git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748593 13f79535-47bb-0310-9956-ffa450edef68master
@@ -702,6 +702,10 @@ Other changes: | |||||
* CBZip2OutputStream now has a finish method separate from close. | * CBZip2OutputStream now has a finish method separate from close. | ||||
Bugzilla Report 42713. | Bugzilla Report 42713. | ||||
* the <zip> and <unzip> family of tasks has new option to deal with | |||||
file name and comment encoding. Please see the zip tasks' | |||||
documentation for details. | |||||
Changes from Ant 1.7.0 TO Ant 1.7.1 | Changes from Ant 1.7.0 TO Ant 1.7.1 | ||||
============================================= | ============================================= | ||||
@@ -83,7 +83,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
<td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
inside the archive. Defaults to UTF8. <strong>It is not | inside the archive. Defaults to UTF8. <strong>It is not | ||||
recommended to change this value as the created archive will most | recommended to change this value as the created archive will most | ||||
likely be unreadable for Java otherwise.</strong></td> | |||||
likely be unreadable for Java otherwise.</strong> | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
</tr> | </tr> | ||||
<tr> | <tr> | ||||
@@ -197,6 +199,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
</td> | </td> | ||||
<td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
</tr> | </tr> | ||||
<tr> | |||||
<td valign="top">useLanguageEncodingFlag</td> | |||||
<td valign="top">Whether to set the language encoding flag if the | |||||
encoding is UTF-8. This setting doesn't have any effect if the | |||||
encoding is not UTF-8. | |||||
<em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td valign="top" align="center">No, default is true</td> | |||||
</tr> | |||||
<tr> | |||||
<td valign="top">createUnicodeExtraFields</td> | |||||
<td valign="top">Whether to create unicode extra fields to store | |||||
the file names a second time inside the entry's metadata. | |||||
<em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td valign="top" align="center">No, default is false</td> | |||||
</tr> | |||||
</table> | </table> | ||||
<h3>Nested elements</h3> | <h3>Nested elements</h3> | ||||
@@ -125,8 +125,10 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
<td valign="top">encoding</td> | <td valign="top">encoding</td> | ||||
<td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
inside the archive. Defaults to UTF8. <strong>It is not | inside the archive. Defaults to UTF8. <strong>It is not | ||||
recommended to change this value as the created archive will most | |||||
likely be unreadable for Java otherwise.</strong></td> | |||||
recommended to change this value as the created archive will | |||||
most likely be unreadable for Java otherwise.</strong> | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
</tr> | </tr> | ||||
<tr> | <tr> | ||||
@@ -251,6 +253,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
</td> | </td> | ||||
<td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
</tr> | </tr> | ||||
<tr> | |||||
<td valign="top">useLanguageEncodingFlag</td> | |||||
<td valign="top">Whether to set the language encoding flag if the | |||||
encoding is UTF-8. This setting doesn't have any effect if the | |||||
encoding is not UTF-8. | |||||
<em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td valign="top" align="center">No, default is true</td> | |||||
</tr> | |||||
<tr> | |||||
<td valign="top">createUnicodeExtraFields</td> | |||||
<td valign="top">Whether to create unicode extra fields to store | |||||
the file names a second time inside the entry's metadata. | |||||
<em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td valign="top" align="center">No, default is false</td> | |||||
</tr> | |||||
</table> | </table> | ||||
<h3>Nested elements</h3> | <h3>Nested elements</h3> | ||||
@@ -107,7 +107,9 @@ archive.</p> | |||||
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br> | href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br> | ||||
Defaults to "UTF8", use the magic value | Defaults to "UTF8", use the magic value | ||||
<code>native-encoding</code> for the platform's default character | <code>native-encoding</code> for the platform's default character | ||||
encoding.</td> | |||||
encoding. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
</tr> | </tr> | ||||
<tr> | <tr> | ||||
@@ -125,6 +127,16 @@ archive.</p> | |||||
any). <em>since Ant 1.8.0</em></td> | any). <em>since Ant 1.8.0</em></td> | ||||
<td valign="top" align="center">No, defaults to false</td> | <td valign="top" align="center">No, defaults to false</td> | ||||
</tr> | </tr> | ||||
<tr> | |||||
<td valign="top">scanForUnicodeExtraFields</td> | |||||
<td valign="top"><b>Note:</b> This attribute is not available for | |||||
the <code>untar</code> task.<br> | |||||
If the archive contains uncode extra fields then use them to set | |||||
the file names, ignoring the specified encoding. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td align="center" valign="top">No, defaults to true</td> | |||||
</tr> | |||||
</table> | </table> | ||||
<h3>Examples</h3> | <h3>Examples</h3> | ||||
<pre> | <pre> | ||||
@@ -116,7 +116,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
<td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
inside the archive. Defaults to UTF8. <strong>It is not | inside the archive. Defaults to UTF8. <strong>It is not | ||||
recommended to change this value as the created archive will most | recommended to change this value as the created archive will most | ||||
likely be unreadable for Java otherwise.</strong></td> | |||||
likely be unreadable for Java otherwise.</strong> | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
</tr> | </tr> | ||||
<tr> | <tr> | ||||
@@ -214,6 +216,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||||
</td> | </td> | ||||
<td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
</tr> | </tr> | ||||
<tr> | |||||
<td valign="top">useLanguageEncodingFlag</td> | |||||
<td valign="top">Whether to set the language encoding flag if the | |||||
encoding is UTF-8. This setting doesn't have any effect if the | |||||
encoding is not UTF-8. | |||||
<em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td valign="top" align="center">No, default is true</td> | |||||
</tr> | |||||
<tr> | |||||
<td valign="top">createUnicodeExtraFields</td> | |||||
<td valign="top">Whether to create unicode extra fields to store | |||||
the file names a second time inside the entry's metadata. | |||||
<em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||||
zip task page</a></td> | |||||
<td valign="top" align="center">No, default is false</td> | |||||
</tr> | |||||
</table> | </table> | ||||
<h3>Nested elements</h3> | <h3>Nested elements</h3> | ||||
@@ -74,7 +74,8 @@ for filenames - this is consistent with the command line ZIP tools, | |||||
but causes problems if you try to open them from within Java and your | but causes problems if you try to open them from within Java and your | ||||
filenames contain non US-ASCII characters. Use the encoding attribute | filenames contain non US-ASCII characters. Use the encoding attribute | ||||
and set it to UTF8 to create zip files that can safely be read by | and set it to UTF8 to create zip files that can safely be read by | ||||
Java.</p> | |||||
Java. For a more complete discussion, | |||||
see <a href="#encoding">below</a></p> | |||||
<p>Starting with Ant 1.5.2, <code><zip></code> can store Unix permissions | <p>Starting with Ant 1.5.2, <code><zip></code> can store Unix permissions | ||||
inside the archive (see description of the filemode and dirmode | inside the archive (see description of the filemode and dirmode | ||||
@@ -149,7 +150,8 @@ archive.</p> | |||||
<td valign="top">The character encoding to use for filenames | <td valign="top">The character encoding to use for filenames | ||||
inside the zip file. For a list of possible values see <a | inside the zip file. For a list of possible values see <a | ||||
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. | href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. | ||||
Defaults to the platform's default character encoding.</td> | |||||
Defaults to the platform's default character encoding. | |||||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||||
<td align="center" valign="top">No</td> | <td align="center" valign="top">No</td> | ||||
</tr> | </tr> | ||||
<tr> | <tr> | ||||
@@ -241,7 +243,127 @@ archive.</p> | |||||
</td> | </td> | ||||
<td valign="top" align="center">No, default is false</td> | <td valign="top" align="center">No, default is false</td> | ||||
</tr> | </tr> | ||||
<tr> | |||||
<td valign="top">useLanguageEncodingFlag</td> | |||||
<td valign="top">Whether to set the language encoding flag if the | |||||
encoding is UTF-8. This setting doesn't have any effect if the | |||||
encoding is not UTF-8. | |||||
<em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||||
<td align="center" valign="top">No, default is true</td> | |||||
</tr> | |||||
<tr> | |||||
<td valign="top">createUnicodeExtraFields</td> | |||||
<td valign="top">Whether to create unicode extra fields to store | |||||
the file names a second time inside the entry's metadata. | |||||
Defaults to false. <em>Since Ant 1.8.0</em>. | |||||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||||
<td align="center" valign="top">No, default is false</td> | |||||
</tr> | |||||
</table> | </table> | ||||
<h3><a name="encoding">Encoding of File Names</a></h3> | |||||
<p>Traditionally the ZIP archive format uses CodePage 437 as encoding | |||||
for file name, which is not sufficient for many international | |||||
character sets.</p> | |||||
<p>Over time different archivers have chosen different ways to work | |||||
around the limitation - the <code>java.util.zip</code> packages | |||||
simply uses UTF-8 as its encoding for example.</p> | |||||
<p>Ant has been offering the encoding attribute of the zip and unzip | |||||
task as a way to explicitly specify the encoding to use (or expect) | |||||
since Ant 1.4. It defaults to the platform's default encoding for | |||||
zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as | |||||
well as the unzip family of tasks.</p> | |||||
<p>More recent versions of the ZIP specification introduce something | |||||
called the "language encoding flag" which can be used to | |||||
signal that a file name has been encoded using UTF-8. Starting with | |||||
Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set | |||||
this flag, if the encoding has been set to UTF-8. Our | |||||
interoperabilty tests with existing archivers didn't show any ill | |||||
effects (in fact, most archivers ignore the flag to date), but you | |||||
can turn off the "language encoding flag" by setting the attribute | |||||
<code>useLanguageEncodingFlag</code> to <code>false</code> on the | |||||
zip-task if you should encounter problems.</p> | |||||
<p>The unzip (and similar tasks) -task will recognize the language | |||||
encoding flag and ignore the encoding set on the task if it has been | |||||
found.</p> | |||||
<p>The InfoZIP developers have introduced new ZIP extra fields that | |||||
can be used to add an additional UTF-8 encoded file name to the | |||||
entry's metadata. Most archivers ignore these extra fields. The | |||||
zip family of tasks support an | |||||
option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which | |||||
makes Ant write these extra fields, it defaults to false since it | |||||
creates a bigger archive.</p> | |||||
<p>The unzip-task will recognize the unicode extra fields by default | |||||
and read the file name information from them, unless you set the | |||||
optional attribute <code>scanForUnicodeExtraFields</code> to | |||||
false.</p> | |||||
<h4>Recommendations for Interoperability</h4> | |||||
<p>The optimal setting of flags depends on the archivers you expect as | |||||
consumers/producers of the ZIP archives. Below are some test | |||||
results which may be superseeded with later versions of each | |||||
tool.</p> | |||||
<ul> | |||||
<li>The java.util.zip package used by the jar executable or to read | |||||
jars from your CLASSPATH reads and writes UTF-8 names, it doesn't | |||||
set or recognize any flags or unicode extra fields.</li> | |||||
<li>7Zip writes CodePage 437 by default but uses UTF-8 and the | |||||
language encoding flag when writing entries that cannot be encoded | |||||
as CodePage 437. It recognizes the language encoding flag when | |||||
reading and ignores the unicode extra fields.</li> | |||||
<li>WinZIP writes CodePage 437 and uses unicode extra fields by | |||||
default. It recognizes the unicode extra field when reading and | |||||
ignores the language encoding flag.</li> | |||||
<li>Windows' "compressed folder" feature doesn't recognize any flag | |||||
or extra field and creates archives using the platforms default | |||||
encoding - and expects archives to be in that encoding when reading | |||||
them.</li> | |||||
<li>InfoZIP based tools can recognize and write both, it is a | |||||
compile time option and depends on the platform so your mileage | |||||
may vary.</li> | |||||
<li>PKWARE zip tools recognize both and prefer the language encoding | |||||
flag. They create archives using CodePage 437 if possible and UTF-8 | |||||
plus the language encoding flag for file names that cannot be | |||||
encoded as CodePage 437.</li> | |||||
</ul> | |||||
<p>So, what to do?</p> | |||||
<p>If you are creating jars, then java.util.zip is your main | |||||
consumer. We recommend you set the encoding to UTF-8 and keep the | |||||
language encoding flag enabled. The flag won't help or hurt | |||||
java.util.zip but archivers that support it will show the correct | |||||
file names.</p> | |||||
<p>For maximum interop it is probably best to set the encoding to | |||||
UTF-8, enable the language encoding flag and create unicode extra | |||||
fields when writing ZIPs. Such archives should be extracted | |||||
correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most | |||||
likely InfoZIP tools. They will be unusable with Windows' | |||||
"compressed folders" feature and bigger than archives without the | |||||
unicode extra fields, though.</p> | |||||
<p>If Windows' "compressed folders" is your primary consumer, then | |||||
your best option is to explicitly set the encoding to the target | |||||
platform. You may want to enable creation of unicode extra fields | |||||
so the tools that support them will extract the file names | |||||
correctly.</p> | |||||
<h3>Parameters specified as nested elements</h3> | <h3>Parameters specified as nested elements</h3> | ||||
<h4>any resource collection</h4> | <h4>any resource collection</h4> | ||||
@@ -68,6 +68,7 @@ public class Expand extends Task { | |||||
private boolean resourcesSpecified = false; | private boolean resourcesSpecified = false; | ||||
private boolean failOnEmptyArchive = false; | private boolean failOnEmptyArchive = false; | ||||
private boolean stripAbsolutePathSpec = false; | private boolean stripAbsolutePathSpec = false; | ||||
private boolean scanForUnicodeExtraFields = true; | |||||
private static final String NATIVE_ENCODING = "native-encoding"; | private static final String NATIVE_ENCODING = "native-encoding"; | ||||
@@ -166,7 +167,7 @@ public class Expand extends Task { | |||||
getLocation()); | getLocation()); | ||||
} | } | ||||
try { | try { | ||||
zf = new ZipFile(srcF, encoding); | |||||
zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields); | |||||
boolean empty = true; | boolean empty = true; | ||||
Enumeration e = zf.getEntries(); | Enumeration e = zf.getEntries(); | ||||
while (e.hasMoreElements()) { | while (e.hasMoreElements()) { | ||||
@@ -453,4 +454,12 @@ public class Expand extends Task { | |||||
stripAbsolutePathSpec = b; | stripAbsolutePathSpec = b; | ||||
} | } | ||||
/** | |||||
* Whether unicode extra fields will be used if present. | |||||
* | |||||
* @since Ant 1.8.0 | |||||
*/ | |||||
public void setScanForUnicodeExtraFields(boolean b) { | |||||
scanForUnicodeExtraFields = b; | |||||
} | |||||
} | } |
@@ -174,6 +174,20 @@ public class Zip extends MatchingTask { | |||||
*/ | */ | ||||
private boolean preserve0Permissions = false; | private boolean preserve0Permissions = false; | ||||
/** | |||||
* Whether to set the language encoding flag when creating the archive. | |||||
* | |||||
* @since Ant 1.8.0 | |||||
*/ | |||||
private boolean useLanguageEncodingFlag = true; | |||||
/** | |||||
* Whether to set the language encoding flag when creating the archive. | |||||
* | |||||
* @since Ant 1.8.0 | |||||
*/ | |||||
private boolean createUnicodeExtraFields = false; | |||||
/** | /** | ||||
* This is the name/location of where to | * This is the name/location of where to | ||||
* create the .zip file. | * create the .zip file. | ||||
@@ -452,6 +466,38 @@ public class Zip extends MatchingTask { | |||||
return preserve0Permissions; | return preserve0Permissions; | ||||
} | } | ||||
/** | |||||
* Whether to set the language encoding flag. | |||||
* @since Ant 1.8.0 | |||||
*/ | |||||
public void setUseLanguageEncodingFlag(boolean b) { | |||||
useLanguageEncodingFlag = b; | |||||
} | |||||
/** | |||||
* Whether the language encoding flag will be used. | |||||
* @since Ant 1.8.0 | |||||
*/ | |||||
public boolean getUseLanguageEnodingFlag() { | |||||
return useLanguageEncodingFlag; | |||||
} | |||||
/** | |||||
* Whether Unicode extra fields will be created. | |||||
* @since Ant 1.8.0 | |||||
*/ | |||||
public void setCreateUnicodeExtraFields(boolean b) { | |||||
createUnicodeExtraFields = b; | |||||
} | |||||
/** | |||||
* Whether Unicode extra fields will be created. | |||||
* @since Ant 1.8.0 | |||||
*/ | |||||
public boolean getCreateUnicodeExtraFields() { | |||||
return createUnicodeExtraFields; | |||||
} | |||||
/** | /** | ||||
* validate and build | * validate and build | ||||
* @throws BuildException on error | * @throws BuildException on error | ||||
@@ -540,6 +586,8 @@ public class Zip extends MatchingTask { | |||||
zOut = new ZipOutputStream(zipFile); | zOut = new ZipOutputStream(zipFile); | ||||
zOut.setEncoding(encoding); | zOut.setEncoding(encoding); | ||||
zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag); | |||||
zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields); | |||||
zOut.setMethod(doCompress | zOut.setMethod(doCompress | ||||
? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); | ? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); | ||||
zOut.setLevel(level); | zOut.setLevel(level); | ||||
@@ -134,7 +134,7 @@ public class ZipFile { | |||||
/** | /** | ||||
* Opens the given file for reading, assuming the specified | * Opens the given file for reading, assuming the specified | ||||
* encoding for file names and ignoring unicode extra fields. | |||||
* encoding for file names, scanning unicode extra fields. | |||||
* | * | ||||
* @param name name of the archive. | * @param name name of the archive. | ||||
* @param encoding the encoding to use for file names | * @param encoding the encoding to use for file names | ||||
@@ -142,12 +142,12 @@ public class ZipFile { | |||||
* @throws IOException if an error occurs while reading the file. | * @throws IOException if an error occurs while reading the file. | ||||
*/ | */ | ||||
public ZipFile(String name, String encoding) throws IOException { | public ZipFile(String name, String encoding) throws IOException { | ||||
this(new File(name), encoding, false); | |||||
this(new File(name), encoding, true); | |||||
} | } | ||||
/** | /** | ||||
* Opens the given file for reading, assuming the specified | * Opens the given file for reading, assuming the specified | ||||
* encoding for file names and ignoring unicode extra fields. | |||||
* encoding for file names and scanning for unicode extra fields. | |||||
* | * | ||||
* @param f the archive. | * @param f the archive. | ||||
* @param encoding the encoding to use for file names, use null | * @param encoding the encoding to use for file names, use null | ||||
@@ -156,7 +156,7 @@ public class ZipFile { | |||||
* @throws IOException if an error occurs while reading the file. | * @throws IOException if an error occurs while reading the file. | ||||
*/ | */ | ||||
public ZipFile(File f, String encoding) throws IOException { | public ZipFile(File f, String encoding) throws IOException { | ||||
this(f, encoding, false); | |||||
this(f, encoding, true); | |||||
} | } | ||||
/** | /** | ||||