git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748593 13f79535-47bb-0310-9956-ffa450edef68master
@@ -702,6 +702,10 @@ Other changes: | |||
* CBZip2OutputStream now has a finish method separate from close. | |||
Bugzilla Report 42713. | |||
* the <zip> and <unzip> family of tasks has new option to deal with | |||
file name and comment encoding. Please see the zip tasks' | |||
documentation for details. | |||
Changes from Ant 1.7.0 TO Ant 1.7.1 | |||
============================================= | |||
@@ -83,7 +83,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
<td valign="top">The character encoding to use for filenames | |||
inside the archive. Defaults to UTF8. <strong>It is not | |||
recommended to change this value as the created archive will most | |||
likely be unreadable for Java otherwise.</strong></td> | |||
likely be unreadable for Java otherwise.</strong> | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No</td> | |||
</tr> | |||
<tr> | |||
@@ -197,6 +199,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
</td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">useLanguageEncodingFlag</td> | |||
<td valign="top">Whether to set the language encoding flag if the | |||
encoding is UTF-8. This setting doesn't have any effect if the | |||
encoding is not UTF-8. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is true</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
</table> | |||
<h3>Nested elements</h3> | |||
@@ -125,8 +125,10 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
<td valign="top">encoding</td> | |||
<td valign="top">The character encoding to use for filenames | |||
inside the archive. Defaults to UTF8. <strong>It is not | |||
recommended to change this value as the created archive will most | |||
likely be unreadable for Java otherwise.</strong></td> | |||
recommended to change this value as the created archive will | |||
most likely be unreadable for Java otherwise.</strong> | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No</td> | |||
</tr> | |||
<tr> | |||
@@ -251,6 +253,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
</td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">useLanguageEncodingFlag</td> | |||
<td valign="top">Whether to set the language encoding flag if the | |||
encoding is UTF-8. This setting doesn't have any effect if the | |||
encoding is not UTF-8. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is true</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
</table> | |||
<h3>Nested elements</h3> | |||
@@ -107,7 +107,9 @@ archive.</p> | |||
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br> | |||
Defaults to "UTF8", use the magic value | |||
<code>native-encoding</code> for the platform's default character | |||
encoding.</td> | |||
encoding. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No</td> | |||
</tr> | |||
<tr> | |||
@@ -125,6 +127,16 @@ archive.</p> | |||
any). <em>since Ant 1.8.0</em></td> | |||
<td valign="top" align="center">No, defaults to false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">scanForUnicodeExtraFields</td> | |||
<td valign="top"><b>Note:</b> This attribute is not available for | |||
the <code>untar</code> task.<br> | |||
If the archive contains uncode extra fields then use them to set | |||
the file names, ignoring the specified encoding. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No, defaults to true</td> | |||
</tr> | |||
</table> | |||
<h3>Examples</h3> | |||
<pre> | |||
@@ -116,7 +116,9 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
<td valign="top">The character encoding to use for filenames | |||
inside the archive. Defaults to UTF8. <strong>It is not | |||
recommended to change this value as the created archive will most | |||
likely be unreadable for Java otherwise.</strong></td> | |||
likely be unreadable for Java otherwise.</strong> | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No</td> | |||
</tr> | |||
<tr> | |||
@@ -214,6 +216,25 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
</td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">useLanguageEncodingFlag</td> | |||
<td valign="top">Whether to set the language encoding flag if the | |||
encoding is UTF-8. This setting doesn't have any effect if the | |||
encoding is not UTF-8. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is true</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
</table> | |||
<h3>Nested elements</h3> | |||
@@ -74,7 +74,8 @@ for filenames - this is consistent with the command line ZIP tools, | |||
but causes problems if you try to open them from within Java and your | |||
filenames contain non US-ASCII characters. Use the encoding attribute | |||
and set it to UTF8 to create zip files that can safely be read by | |||
Java.</p> | |||
Java. For a more complete discussion, | |||
see <a href="#encoding">below</a></p> | |||
<p>Starting with Ant 1.5.2, <code><zip></code> can store Unix permissions | |||
inside the archive (see description of the filemode and dirmode | |||
@@ -149,7 +150,8 @@ archive.</p> | |||
<td valign="top">The character encoding to use for filenames | |||
inside the zip file. For a list of possible values see <a | |||
href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. | |||
Defaults to the platform's default character encoding.</td> | |||
Defaults to the platform's default character encoding. | |||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||
<td align="center" valign="top">No</td> | |||
</tr> | |||
<tr> | |||
@@ -241,7 +243,127 @@ archive.</p> | |||
</td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">useLanguageEncodingFlag</td> | |||
<td valign="top">Whether to set the language encoding flag if the | |||
encoding is UTF-8. This setting doesn't have any effect if the | |||
encoding is not UTF-8. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||
<td align="center" valign="top">No, default is true</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
Defaults to false. <em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||
<td align="center" valign="top">No, default is false</td> | |||
</tr> | |||
</table> | |||
<h3><a name="encoding">Encoding of File Names</a></h3> | |||
<p>Traditionally the ZIP archive format uses CodePage 437 as encoding | |||
for file name, which is not sufficient for many international | |||
character sets.</p> | |||
<p>Over time different archivers have chosen different ways to work | |||
around the limitation - the <code>java.util.zip</code> packages | |||
simply uses UTF-8 as its encoding for example.</p> | |||
<p>Ant has been offering the encoding attribute of the zip and unzip | |||
task as a way to explicitly specify the encoding to use (or expect) | |||
since Ant 1.4. It defaults to the platform's default encoding for | |||
zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as | |||
well as the unzip family of tasks.</p> | |||
<p>More recent versions of the ZIP specification introduce something | |||
called the "language encoding flag" which can be used to | |||
signal that a file name has been encoded using UTF-8. Starting with | |||
Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set | |||
this flag, if the encoding has been set to UTF-8. Our | |||
interoperabilty tests with existing archivers didn't show any ill | |||
effects (in fact, most archivers ignore the flag to date), but you | |||
can turn off the "language encoding flag" by setting the attribute | |||
<code>useLanguageEncodingFlag</code> to <code>false</code> on the | |||
zip-task if you should encounter problems.</p> | |||
<p>The unzip (and similar tasks) -task will recognize the language | |||
encoding flag and ignore the encoding set on the task if it has been | |||
found.</p> | |||
<p>The InfoZIP developers have introduced new ZIP extra fields that | |||
can be used to add an additional UTF-8 encoded file name to the | |||
entry's metadata. Most archivers ignore these extra fields. The | |||
zip family of tasks support an | |||
option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which | |||
makes Ant write these extra fields, it defaults to false since it | |||
creates a bigger archive.</p> | |||
<p>The unzip-task will recognize the unicode extra fields by default | |||
and read the file name information from them, unless you set the | |||
optional attribute <code>scanForUnicodeExtraFields</code> to | |||
false.</p> | |||
<h4>Recommendations for Interoperability</h4> | |||
<p>The optimal setting of flags depends on the archivers you expect as | |||
consumers/producers of the ZIP archives. Below are some test | |||
results which may be superseeded with later versions of each | |||
tool.</p> | |||
<ul> | |||
<li>The java.util.zip package used by the jar executable or to read | |||
jars from your CLASSPATH reads and writes UTF-8 names, it doesn't | |||
set or recognize any flags or unicode extra fields.</li> | |||
<li>7Zip writes CodePage 437 by default but uses UTF-8 and the | |||
language encoding flag when writing entries that cannot be encoded | |||
as CodePage 437. It recognizes the language encoding flag when | |||
reading and ignores the unicode extra fields.</li> | |||
<li>WinZIP writes CodePage 437 and uses unicode extra fields by | |||
default. It recognizes the unicode extra field when reading and | |||
ignores the language encoding flag.</li> | |||
<li>Windows' "compressed folder" feature doesn't recognize any flag | |||
or extra field and creates archives using the platforms default | |||
encoding - and expects archives to be in that encoding when reading | |||
them.</li> | |||
<li>InfoZIP based tools can recognize and write both, it is a | |||
compile time option and depends on the platform so your mileage | |||
may vary.</li> | |||
<li>PKWARE zip tools recognize both and prefer the language encoding | |||
flag. They create archives using CodePage 437 if possible and UTF-8 | |||
plus the language encoding flag for file names that cannot be | |||
encoded as CodePage 437.</li> | |||
</ul> | |||
<p>So, what to do?</p> | |||
<p>If you are creating jars, then java.util.zip is your main | |||
consumer. We recommend you set the encoding to UTF-8 and keep the | |||
language encoding flag enabled. The flag won't help or hurt | |||
java.util.zip but archivers that support it will show the correct | |||
file names.</p> | |||
<p>For maximum interop it is probably best to set the encoding to | |||
UTF-8, enable the language encoding flag and create unicode extra | |||
fields when writing ZIPs. Such archives should be extracted | |||
correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most | |||
likely InfoZIP tools. They will be unusable with Windows' | |||
"compressed folders" feature and bigger than archives without the | |||
unicode extra fields, though.</p> | |||
<p>If Windows' "compressed folders" is your primary consumer, then | |||
your best option is to explicitly set the encoding to the target | |||
platform. You may want to enable creation of unicode extra fields | |||
so the tools that support them will extract the file names | |||
correctly.</p> | |||
<h3>Parameters specified as nested elements</h3> | |||
<h4>any resource collection</h4> | |||
@@ -68,6 +68,7 @@ public class Expand extends Task { | |||
private boolean resourcesSpecified = false; | |||
private boolean failOnEmptyArchive = false; | |||
private boolean stripAbsolutePathSpec = false; | |||
private boolean scanForUnicodeExtraFields = true; | |||
private static final String NATIVE_ENCODING = "native-encoding"; | |||
@@ -166,7 +167,7 @@ public class Expand extends Task { | |||
getLocation()); | |||
} | |||
try { | |||
zf = new ZipFile(srcF, encoding); | |||
zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields); | |||
boolean empty = true; | |||
Enumeration e = zf.getEntries(); | |||
while (e.hasMoreElements()) { | |||
@@ -453,4 +454,12 @@ public class Expand extends Task { | |||
stripAbsolutePathSpec = b; | |||
} | |||
/** | |||
* Whether unicode extra fields will be used if present. | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
public void setScanForUnicodeExtraFields(boolean b) { | |||
scanForUnicodeExtraFields = b; | |||
} | |||
} |
@@ -174,6 +174,20 @@ public class Zip extends MatchingTask { | |||
*/ | |||
private boolean preserve0Permissions = false; | |||
/** | |||
* Whether to set the language encoding flag when creating the archive. | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
private boolean useLanguageEncodingFlag = true; | |||
/** | |||
* Whether to set the language encoding flag when creating the archive. | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
private boolean createUnicodeExtraFields = false; | |||
/** | |||
* This is the name/location of where to | |||
* create the .zip file. | |||
@@ -452,6 +466,38 @@ public class Zip extends MatchingTask { | |||
return preserve0Permissions; | |||
} | |||
/** | |||
* Whether to set the language encoding flag. | |||
* @since Ant 1.8.0 | |||
*/ | |||
public void setUseLanguageEncodingFlag(boolean b) { | |||
useLanguageEncodingFlag = b; | |||
} | |||
/** | |||
* Whether the language encoding flag will be used. | |||
* @since Ant 1.8.0 | |||
*/ | |||
public boolean getUseLanguageEnodingFlag() { | |||
return useLanguageEncodingFlag; | |||
} | |||
/** | |||
* Whether Unicode extra fields will be created. | |||
* @since Ant 1.8.0 | |||
*/ | |||
public void setCreateUnicodeExtraFields(boolean b) { | |||
createUnicodeExtraFields = b; | |||
} | |||
/** | |||
* Whether Unicode extra fields will be created. | |||
* @since Ant 1.8.0 | |||
*/ | |||
public boolean getCreateUnicodeExtraFields() { | |||
return createUnicodeExtraFields; | |||
} | |||
/** | |||
* validate and build | |||
* @throws BuildException on error | |||
@@ -540,6 +586,8 @@ public class Zip extends MatchingTask { | |||
zOut = new ZipOutputStream(zipFile); | |||
zOut.setEncoding(encoding); | |||
zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag); | |||
zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields); | |||
zOut.setMethod(doCompress | |||
? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); | |||
zOut.setLevel(level); | |||
@@ -134,7 +134,7 @@ public class ZipFile { | |||
/** | |||
* Opens the given file for reading, assuming the specified | |||
* encoding for file names and ignoring unicode extra fields. | |||
* encoding for file names, scanning unicode extra fields. | |||
* | |||
* @param name name of the archive. | |||
* @param encoding the encoding to use for file names | |||
@@ -142,12 +142,12 @@ public class ZipFile { | |||
* @throws IOException if an error occurs while reading the file. | |||
*/ | |||
public ZipFile(String name, String encoding) throws IOException { | |||
this(new File(name), encoding, false); | |||
this(new File(name), encoding, true); | |||
} | |||
/** | |||
* Opens the given file for reading, assuming the specified | |||
* encoding for file names and ignoring unicode extra fields. | |||
* encoding for file names and scanning for unicode extra fields. | |||
* | |||
* @param f the archive. | |||
* @param encoding the encoding to use for file names, use null | |||
@@ -156,7 +156,7 @@ public class ZipFile { | |||
* @throws IOException if an error occurs while reading the file. | |||
*/ | |||
public ZipFile(File f, String encoding) throws IOException { | |||
this(f, encoding, false); | |||
this(f, encoding, true); | |||
} | |||
/** | |||