git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@750072 13f79535-47bb-0310-9956-ffa450edef68master
@@ -213,10 +213,22 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
<br>Possible values are "never", "always" and "not-encodable" | |||
which will only add Unicode extra fields if the file name cannot | |||
be encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">fallbacktoUTF8</td> | |||
<td valign="top">Whether to use UTF-8 and the language encoding | |||
flag instead of the specified encoding if a file name cannot be | |||
encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No, default is false</td> | |||
</tr> | |||
</table> | |||
@@ -267,10 +267,22 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
<br>Possible values are "never", "always" and "not-encodable" | |||
which will only add Unicode extra fields if the file name cannot | |||
be encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">fallbacktoUTF8</td> | |||
<td valign="top">Whether to use UTF-8 and the language encoding | |||
flag instead of the specified encoding if a file name cannot be | |||
encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No, default is false</td> | |||
</tr> | |||
</table> | |||
@@ -230,10 +230,22 @@ to a value other than its default, <code>"add"</code>.</b></p> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
<br>Possible values are "never", "always" and "not-encodable" | |||
which will only add Unicode extra fields if the file name cannot | |||
be encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td valign="top" align="center">No, default is false</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">fallbacktoUTF8</td> | |||
<td valign="top">Whether to use UTF-8 and the language encoding | |||
flag instead of the specified encoding if a file name cannot be | |||
encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="zip.html#encoding">discussion in the | |||
zip task page</a></td> | |||
<td align="center" valign="top">No, default is false</td> | |||
</tr> | |||
</table> | |||
@@ -256,7 +256,19 @@ archive.</p> | |||
<td valign="top">createUnicodeExtraFields</td> | |||
<td valign="top">Whether to create unicode extra fields to store | |||
the file names a second time inside the entry's metadata. | |||
Defaults to false. <em>Since Ant 1.8.0</em>. | |||
<br>Possible values are "never", "always" and "not-encodable" | |||
which will only add Unicode extra fields if the file name cannot | |||
be encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||
<td align="center" valign="top">No, default is "never"</td> | |||
</tr> | |||
<tr> | |||
<td valign="top">fallbacktoUTF8</td> | |||
<td valign="top">Whether to use UTF-8 and the language encoding | |||
flag instead of the specified encoding if a file name cannot be | |||
encoded using the specified encoding. | |||
<em>Since Ant 1.8.0</em>. | |||
<br/>See also the <a href="#encoding">discussion below</a></td> | |||
<td align="center" valign="top">No, default is false</td> | |||
</tr> | |||
@@ -298,8 +310,15 @@ archive.</p> | |||
entry's metadata. Most archivers ignore these extra fields. The | |||
zip family of tasks support an | |||
option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which | |||
makes Ant write these extra fields, it defaults to false since it | |||
creates a bigger archive.</p> | |||
makes Ant write these extra fields either for all entries ("always") | |||
or only those whose name cannot be encoded using the spcified | |||
encoding (not-encodeable), it defaults to "never" since the extra | |||
fields create bigger archives.</p> | |||
<p>The fallbackToUTF8 attribute of zip can be used to create archives | |||
that use the specified encoding in the majority of cases but UTF and | |||
the language encoding flag for filenames that cannot be encoded | |||
using the specified encoding.</p> | |||
<p>The unzip-task will recognize the unicode extra fields by default | |||
and read the file name information from them, unless you set the | |||
@@ -320,12 +339,13 @@ archive.</p> | |||
<li>7Zip writes CodePage 437 by default but uses UTF-8 and the | |||
language encoding flag when writing entries that cannot be encoded | |||
as CodePage 437. It recognizes the language encoding flag when | |||
reading and ignores the unicode extra fields.</li> | |||
as CodePage 437 (similar to the zip task with fallbacktoUTF8 set | |||
to true). It recognizes the language encoding flag when reading | |||
and ignores the unicode extra fields.</li> | |||
<li>WinZIP writes CodePage 437 and uses unicode extra fields by | |||
default. It recognizes the unicode extra field when reading and | |||
ignores the language encoding flag.</li> | |||
default. It recognizes the unicode extra field and the language | |||
encoding flag when reading.</li> | |||
<li>Windows' "compressed folder" feature doesn't recognize any flag | |||
or extra field and creates archives using the platforms default | |||
@@ -27,8 +27,10 @@ import java.io.InputStream; | |||
import java.io.OutputStream; | |||
import java.util.ArrayList; | |||
import java.util.Enumeration; | |||
import java.util.HashMap; | |||
import java.util.Hashtable; | |||
import java.util.Iterator; | |||
import java.util.Map; | |||
import java.util.Stack; | |||
import java.util.Vector; | |||
import java.util.zip.CRC32; | |||
@@ -182,11 +184,20 @@ public class Zip extends MatchingTask { | |||
private boolean useLanguageEncodingFlag = true; | |||
/** | |||
* Whether to set the language encoding flag when creating the archive. | |||
* Whether to add unicode extra fields. | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
private boolean createUnicodeExtraFields = false; | |||
private UnicodeExtraField createUnicodeExtraFields = | |||
UnicodeExtraField.NEVER; | |||
/** | |||
* Whether to fall back to UTF-8 if a name cannot be enoded using | |||
* the specified encoding. | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
private boolean fallBackToUTF8 = false; | |||
/** | |||
* This is the name/location of where to | |||
@@ -486,7 +497,7 @@ public class Zip extends MatchingTask { | |||
* Whether Unicode extra fields will be created. | |||
* @since Ant 1.8.0 | |||
*/ | |||
public void setCreateUnicodeExtraFields(boolean b) { | |||
public void setCreateUnicodeExtraFields(UnicodeExtraField b) { | |||
createUnicodeExtraFields = b; | |||
} | |||
@@ -494,10 +505,32 @@ public class Zip extends MatchingTask { | |||
* Whether Unicode extra fields will be created. | |||
* @since Ant 1.8.0 | |||
*/ | |||
public boolean getCreateUnicodeExtraFields() { | |||
public UnicodeExtraField getCreateUnicodeExtraFields() { | |||
return createUnicodeExtraFields; | |||
} | |||
/** | |||
* Whether to fall back to UTF-8 if a name cannot be enoded using | |||
* the specified encoding. | |||
* | |||
* <p>Defaults to false.</p> | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
public void setFallBackToUTF8(boolean b) { | |||
fallBackToUTF8 = b; | |||
} | |||
/** | |||
* Whether to fall back to UTF-8 if a name cannot be enoded using | |||
* the specified encoding. | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
public boolean getFallBackToUTF8() { | |||
return fallBackToUTF8; | |||
} | |||
/** | |||
* validate and build | |||
* @throws BuildException on error | |||
@@ -587,7 +620,9 @@ public class Zip extends MatchingTask { | |||
zOut.setEncoding(encoding); | |||
zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag); | |||
zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields); | |||
zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields. | |||
getPolicy()); | |||
zOut.setFallbackToUTF8(fallBackToUTF8); | |||
zOut.setMethod(doCompress | |||
? ZipOutputStream.DEFLATED : ZipOutputStream.STORED); | |||
zOut.setLevel(level); | |||
@@ -1881,4 +1916,45 @@ public class Zip extends MatchingTask { | |||
return true; | |||
} | |||
} | |||
/** | |||
* Policiy for creation of Unicode extra fields: never, always or | |||
* not-encodeable. | |||
* | |||
* @since Ant 1.8.0 | |||
*/ | |||
public static final class UnicodeExtraField extends EnumeratedAttribute { | |||
private static final Map POLICIES = new HashMap(); | |||
private static final String NEVER_KEY = "never"; | |||
private static final String ALWAYS_KEY = "always"; | |||
private static final String N_E_KEY = "not-encodeable"; | |||
static { | |||
POLICIES.put(NEVER_KEY, | |||
ZipOutputStream.UnicodeExtraFieldPolicy.NEVER); | |||
POLICIES.put(ALWAYS_KEY, | |||
ZipOutputStream.UnicodeExtraFieldPolicy.ALWAYS); | |||
POLICIES.put(N_E_KEY, | |||
ZipOutputStream.UnicodeExtraFieldPolicy | |||
.NOT_ENCODEABLE); | |||
} | |||
public String[] getValues() { | |||
return new String[] {NEVER_KEY, ALWAYS_KEY, N_E_KEY}; | |||
} | |||
public static final UnicodeExtraField NEVER = | |||
new UnicodeExtraField(NEVER_KEY); | |||
private UnicodeExtraField(String name) { | |||
setValue(name); | |||
} | |||
public UnicodeExtraField() { | |||
} | |||
public ZipOutputStream.UnicodeExtraFieldPolicy getPolicy() { | |||
return (ZipOutputStream.UnicodeExtraFieldPolicy) | |||
POLICIES.get(getValue()); | |||
} | |||
} | |||
} |
@@ -34,7 +34,7 @@ import java.nio.ByteBuffer; | |||
* given name can be safely encoded or not.</p> | |||
* | |||
* <p>This implementation acts as a last resort implementation, when | |||
* neither {@see Simple8BitZipEnoding} nor {@see NioZipEncoding} is | |||
* neither {@link Simple8BitZipEnoding} nor {@link NioZipEncoding} is | |||
* available.</p> | |||
* | |||
* <p>The methods of this class are reentrant.</p> | |||
@@ -77,7 +77,7 @@ class Simple8BitZipEncoding implements ZipEncoding { | |||
private final char[] highChars; | |||
/** | |||
* A list of {@see Simple8BitChar} objects sorted by the unicode | |||
* A list of {@link Simple8BitChar} objects sorted by the unicode | |||
* field. This list is used to binary search reverse mapping of | |||
* unicode characters with a character code greater than 127. | |||
*/ | |||
@@ -45,7 +45,7 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { | |||
* Assemble as unicode comment extension from the name given as | |||
* text as well as the encoded bytes actually written to the archive. | |||
* | |||
* @param name The file name | |||
* @param text The file name | |||
* @param bytes the bytes actually written to the archive | |||
* @param off The offset of the encoded comment in <code>bytes</code>. | |||
* @param len The length of the encoded comment or comment in | |||
@@ -45,7 +45,7 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||
* Assemble as unicode path extension from the name given as | |||
* text as well as the encoded bytes actually written to the archive. | |||
* | |||
* @param name The file name | |||
* @param text The file name | |||
* @param bytes the bytes actually written to the archive | |||
* @param off The offset of the encoded filename in <code>bytes</code>. | |||
* @param len The length of the encoded filename or comment in | |||
@@ -51,7 +51,7 @@ abstract class ZipEncodingHelper { | |||
} | |||
/** | |||
* @return The associated {@see Simple8BitZipEncoding}, which | |||
* @return The associated {@link Simple8BitZipEncoding}, which | |||
* is instantiated if not done so far. | |||
*/ | |||
public synchronized Simple8BitZipEncoding getEncoding() { | |||
@@ -150,7 +150,7 @@ public final class ZipLong implements Cloneable { | |||
public Object clone() { | |||
try { | |||
return (ZipLong) super.clone(); | |||
return super.clone(); | |||
} catch (CloneNotSupportedException cnfe) { | |||
// impossible | |||
throw new RuntimeException(cnfe); | |||
@@ -269,10 +269,16 @@ public class ZipOutputStream extends FilterOutputStream { | |||
*/ | |||
private boolean useEFS = true; | |||
/** | |||
* Whether to encode non-encodable file names as UTF-8. | |||
*/ | |||
private boolean fallbackToUTF8 = false; | |||
/** | |||
* whether to create UnicodePathExtraField-s for each entry. | |||
*/ | |||
private boolean createUnicodeExtraFields = false; | |||
private UnicodeExtraFieldPolicy createUnicodeExtraFields = | |||
UnicodeExtraFieldPolicy.NEVER; | |||
/** | |||
* Creates a new ZIP OutputStream filtering the underlying stream. | |||
@@ -360,14 +366,24 @@ public class ZipOutputStream extends FilterOutputStream { | |||
} | |||
/** | |||
* Whether to create Unicode Extra Fields for all entries. | |||
* Whether to create Unicode Extra Fields. | |||
* | |||
* <p>Defaults to false.</p> | |||
* <p>Defaults to NEVER.</p> | |||
*/ | |||
public void setCreateUnicodeExtraFields(boolean b) { | |||
public void setCreateUnicodeExtraFields(UnicodeExtraFieldPolicy b) { | |||
createUnicodeExtraFields = b; | |||
} | |||
/** | |||
* Whether to fall back to UTF and the language encoding flag if | |||
* the file name cannot be encoded using the specified encoding. | |||
* | |||
* <p>Defaults to false.</p> | |||
*/ | |||
public void setFallbackToUTF8(boolean b) { | |||
fallbackToUTF8 = b; | |||
} | |||
/** | |||
* Finishs writing the contents and closes this as well as the | |||
* underlying stream. | |||
@@ -665,31 +681,38 @@ public class ZipOutputStream extends FilterOutputStream { | |||
*/ | |||
protected void writeLocalFileHeader(ZipEntry ze) throws IOException { | |||
boolean encodable = this.zipEncoding.canEncode(ze.getName()); | |||
ByteBuffer name = this.zipEncoding.encode(ze.getName()); | |||
boolean encodable = zipEncoding.canEncode(ze.getName()); | |||
ByteBuffer name; | |||
if (!encodable && fallbackToUTF8) { | |||
name = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(ze.getName()); | |||
} else { | |||
name = zipEncoding.encode(ze.getName()); | |||
} | |||
if (createUnicodeExtraFields) { | |||
if (createUnicodeExtraFields != UnicodeExtraFieldPolicy.NEVER) { | |||
/* if (!encodable) { -- FIXME decide what to*/ | |||
if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS | |||
|| !encodable) { | |||
ze.addExtraField(new UnicodePathExtraField(ze.getName(), | |||
name.array(), | |||
name.arrayOffset(), | |||
name.limit())); | |||
/* } */ | |||
} | |||
String comm = ze.getComment(); | |||
if (comm != null && !"".equals(comm)) { | |||
boolean commentEncodable = this.zipEncoding.canEncode(comm); | |||
/* if (!commentEncodable) { -- FIXME decide what to*/ | |||
if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS | |||
|| !commentEncodable) { | |||
ByteBuffer commentB = this.zipEncoding.encode(comm); | |||
ze.addExtraField(new UnicodeCommentExtraField(comm, | |||
commentB.array(), | |||
commentB.arrayOffset(), | |||
commentB.limit()) | |||
); | |||
/* } */ | |||
} | |||
} | |||
} | |||
@@ -701,7 +724,9 @@ public class ZipOutputStream extends FilterOutputStream { | |||
//store method in local variable to prevent multiple method calls | |||
final int zipMethod = ze.getMethod(); | |||
writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod); | |||
writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod, | |||
!encodable | |||
&& fallbackToUTF8); | |||
written += WORD; | |||
// compression method | |||
@@ -786,7 +811,10 @@ public class ZipOutputStream extends FilterOutputStream { | |||
written += SHORT; | |||
final int zipMethod = ze.getMethod(); | |||
writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod); | |||
final boolean encodable = zipEncoding.canEncode(ze.getName()); | |||
writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod, | |||
!encodable | |||
&& fallbackToUTF8); | |||
written += WORD; | |||
// compression method | |||
@@ -808,7 +836,12 @@ public class ZipOutputStream extends FilterOutputStream { | |||
// CheckStyle:MagicNumber ON | |||
// file name length | |||
ByteBuffer name = this.zipEncoding.encode(ze.getName()); | |||
ByteBuffer name; | |||
if (!encodable && fallbackToUTF8) { | |||
name = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(ze.getName()); | |||
} else { | |||
name = zipEncoding.encode(ze.getName()); | |||
} | |||
writeOut(ZipShort.getBytes(name.limit())); | |||
written += SHORT; | |||
@@ -822,7 +855,12 @@ public class ZipOutputStream extends FilterOutputStream { | |||
if (comm == null) { | |||
comm = ""; | |||
} | |||
ByteBuffer commentB = this.zipEncoding.encode(comm); | |||
ByteBuffer commentB; | |||
if (!encodable && fallbackToUTF8) { | |||
commentB = ZipEncodingHelper.UTF8_ZIP_ENCODING.encode(comm); | |||
} else { | |||
commentB = zipEncoding.encode(comm); | |||
} | |||
writeOut(ZipShort.getBytes(commentB.limit())); | |||
written += SHORT; | |||
@@ -1000,12 +1038,14 @@ public class ZipOutputStream extends FilterOutputStream { | |||
} | |||
private void writeVersionNeededToExtractAndGeneralPurposeBits(final int | |||
zipMethod) | |||
zipMethod, | |||
final boolean | |||
utfFallback) | |||
throws IOException { | |||
// CheckStyle:MagicNumber OFF | |||
int versionNeededToExtract = 10; | |||
int generalPurposeFlag = useEFS ? EFS_FLAG : 0; | |||
int generalPurposeFlag = (useEFS || utfFallback) ? EFS_FLAG : 0; | |||
if (zipMethod == DEFLATED && raf == null) { | |||
// requires version 2 as we are going to store length info | |||
// in the data descriptor | |||
@@ -1020,4 +1060,35 @@ public class ZipOutputStream extends FilterOutputStream { | |||
// general purpose bit flag | |||
writeOut(ZipShort.getBytes(generalPurposeFlag)); | |||
} | |||
/** | |||
* enum that represents the possible policies for creating Unicode | |||
* extra fields. | |||
*/ | |||
public static final class UnicodeExtraFieldPolicy { | |||
/** | |||
* Always create Unicode extra fields. | |||
*/ | |||
public static final UnicodeExtraFieldPolicy ALWAYS = | |||
new UnicodeExtraFieldPolicy("always"); | |||
/** | |||
* Never create Unicode extra fields. | |||
*/ | |||
public static final UnicodeExtraFieldPolicy NEVER = | |||
new UnicodeExtraFieldPolicy("never"); | |||
/** | |||
* Create Unicode extra fields for filenames that cannot be | |||
* encoded using the specified encoding. | |||
*/ | |||
public static final UnicodeExtraFieldPolicy NOT_ENCODEABLE = | |||
new UnicodeExtraFieldPolicy("not encodeable"); | |||
private final String name; | |||
private UnicodeExtraFieldPolicy(String n) { | |||
name = n; | |||
} | |||
public String toString() { | |||
return name; | |||
} | |||
} | |||
} |
@@ -136,7 +136,7 @@ public final class ZipShort implements Cloneable { | |||
public Object clone() { | |||
try { | |||
return (ZipShort) super.clone(); | |||
return super.clone(); | |||
} catch (CloneNotSupportedException cnfe) { | |||
// impossible | |||
throw new RuntimeException(cnfe); | |||
@@ -122,7 +122,11 @@ public class UTF8ZipFilesTest extends TestCase { | |||
zos = new ZipOutputStream(file); | |||
zos.setEncoding(encoding); | |||
zos.setUseLanguageEncodingFlag(withEFS); | |||
zos.setCreateUnicodeExtraFields(!withExplicitUnicodeExtra); | |||
zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ? | |||
ZipOutputStream | |||
.UnicodeExtraFieldPolicy.NEVER | |||
: ZipOutputStream | |||
.UnicodeExtraFieldPolicy.ALWAYS); | |||
ZipEntry ze = new ZipEntry(OIL_BARREL_TXT); | |||
if (withExplicitUnicodeExtra | |||