git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748147 13f79535-47bb-0310-9956-ffa450edef68master
@@ -34,23 +34,32 @@ public abstract class AbstractUnicodeExtraField implements ZipExtraField { | |||||
} | } | ||||
/** | /** | ||||
* Assemble as unicode path extension form the name and encoding | |||||
* of the orginal zip entry. | |||||
* Assemble as unicode extension from the name/comment and | |||||
* encoding of the orginal zip entry. | |||||
* | * | ||||
* @param name The file name or comment. | |||||
* @param text The file name or comment. | |||||
* @param zipEncoding The encoding of the filenames in the zip | * @param zipEncoding The encoding of the filenames in the zip | ||||
* file, usually <code>"CP437"</code>. | * file, usually <code>"CP437"</code>. | ||||
*/ | */ | ||||
protected AbstractUnicodeExtraField(String name, String zipEncoding) { | |||||
byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding); | |||||
protected AbstractUnicodeExtraField(String text, String zipEncoding) { | |||||
this(text, ZipEncodingHelper.encodeName(text, zipEncoding)); | |||||
} | |||||
/** | |||||
* Assemble as unicode extension from the name/comment and | |||||
* encoding of the orginal zip entry. | |||||
* | |||||
* @param text The file name or comment. | |||||
* @param zipEncoding The encoding of the filenames in the zip | |||||
* file, usually <code>"CP437"</code>. | |||||
*/ | |||||
protected AbstractUnicodeExtraField(String text, byte[] bytes) { | |||||
CRC32 crc32 = new CRC32(); | CRC32 crc32 = new CRC32(); | ||||
crc32.update(filename); | |||||
crc32.update(bytes); | |||||
nameCRC32 = crc32.getValue(); | nameCRC32 = crc32.getValue(); | ||||
try { | try { | ||||
unicodeName = name.getBytes("UTF-8"); | |||||
unicodeName = text.getBytes("UTF-8"); | |||||
} catch (UnsupportedEncodingException e) { | } catch (UnsupportedEncodingException e) { | ||||
throw new RuntimeException("FATAL: UTF-8 encoding not supported.", | throw new RuntimeException("FATAL: UTF-8 encoding not supported.", | ||||
e); | e); | ||||
@@ -42,15 +42,26 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { | |||||
} | } | ||||
/** | /** | ||||
* Assemble as unicode comment extension form the comment and | |||||
* Assemble as unicode comment extension from the comment and | |||||
* encoding of the orginal zip entry. | * encoding of the orginal zip entry. | ||||
* | * | ||||
* @param name The file name | |||||
* @param comment The file comment | |||||
* @param zipEncoding The encoding of the comment in the zip file, | * @param zipEncoding The encoding of the comment in the zip file, | ||||
* usually <code>"CP437"</code>. | * usually <code>"CP437"</code>. | ||||
*/ | */ | ||||
public UnicodeCommentExtraField(String name, String zipEncoding) { | |||||
super(name, zipEncoding); | |||||
public UnicodeCommentExtraField(String comment, String zipEncoding) { | |||||
super(comment, zipEncoding); | |||||
} | |||||
/** | |||||
* Assemble as unicode comment extension from the comment given as | |||||
* text as well as the bytes actually written to the archive. | |||||
* | |||||
* @param comment The file comment | |||||
* @param bytes the bytes actually written to the archive | |||||
*/ | |||||
public UnicodeCommentExtraField(String comment, byte[] bytes) { | |||||
super(comment, bytes); | |||||
} | } | ||||
public ZipShort getHeaderId() { | public ZipShort getHeaderId() { | ||||
@@ -42,7 +42,7 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||||
} | } | ||||
/** | /** | ||||
* Assemble as unicode path extension form the name and encoding | |||||
* Assemble as unicode path extension from the name and encoding | |||||
* of the orginal zip entry. | * of the orginal zip entry. | ||||
* | * | ||||
* @param name The file name | * @param name The file name | ||||
@@ -53,6 +53,17 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||||
super(name, zipEncoding); | super(name, zipEncoding); | ||||
} | } | ||||
/** | |||||
* Assemble as unicode path extension from the name given as | |||||
* text as well as the bytes actually written to the archive. | |||||
* | |||||
* @param name The file name | |||||
* @param bytes the bytes actually written to the archive | |||||
*/ | |||||
public UnicodePathExtraField(String name, byte[] bytes) { | |||||
super(name, bytes); | |||||
} | |||||
public ZipShort getHeaderId() { | public ZipShort getHeaderId() { | ||||
return UPATH_ID; | return UPATH_ID; | ||||
} | } | ||||
@@ -75,17 +75,23 @@ abstract class ZipEncodingHelper { | |||||
* </pre> | * </pre> | ||||
* | * | ||||
* @param name The filename or comment with possible non-ASCII | * @param name The filename or comment with possible non-ASCII | ||||
* unicode characters. | |||||
* unicode characters. Must not be null. | |||||
* @param encoding A valid encoding name. The standard zip | * @param encoding A valid encoding name. The standard zip | ||||
* encoding is <code>"CP437"</code>, | * encoding is <code>"CP437"</code>, | ||||
* <code>"UTF-8"</code> is supported in ZIP file | * <code>"UTF-8"</code> is supported in ZIP file | ||||
* version <code>6.3</code> or later. | |||||
* version <code>6.3</code> or later. If null, | |||||
* will use the platform's {@link | |||||
* java.lang.String#getBytes default encoding}. | |||||
* @return A byte array containing the mapped file | * @return A byte array containing the mapped file | ||||
* name. Unmappable characters or malformed character | * name. Unmappable characters or malformed character | ||||
* sequences are mapped to a sequence of utf-16 words | * sequences are mapped to a sequence of utf-16 words | ||||
* encoded in the format <code>%Uxxxx</code>. | * encoded in the format <code>%Uxxxx</code>. | ||||
*/ | */ | ||||
static final byte[] encodeName(String name, String encoding) { | static final byte[] encodeName(String name, String encoding) { | ||||
if (encoding == null) { | |||||
return name.getBytes(); | |||||
} | |||||
Charset cs = Charset.forName(encoding); | Charset cs = Charset.forName(encoding); | ||||
CharsetEncoder enc = cs.newEncoder(); | CharsetEncoder enc = cs.newEncoder(); | ||||
@@ -178,8 +184,12 @@ abstract class ZipEncodingHelper { | |||||
* <code>"UTF-8"</code> is supported in ZIP file | * <code>"UTF-8"</code> is supported in ZIP file | ||||
* version <code>6.3</code> or later. | * version <code>6.3</code> or later. | ||||
*/ | */ | ||||
static final String decodeName(byte[] name, String encoding) { | |||||
static final String decodeName(byte[] name, String encoding) | |||||
throws java.nio.charset.CharacterCodingException { | |||||
Charset cs = Charset.forName(encoding); | Charset cs = Charset.forName(encoding); | ||||
return cs.decode(ByteBuffer.wrap(name)).toString(); | |||||
return cs.newDecoder() | |||||
.onMalformedInput(CodingErrorAction.REPORT) | |||||
.onUnmappableCharacter(CodingErrorAction.REPORT) | |||||
.decode(ByteBuffer.wrap(name)).toString(); | |||||
} | } | ||||
} | } |
@@ -263,6 +263,18 @@ public class ZipEntry extends java.util.zip.ZipEntry implements Cloneable { | |||||
setExtra(); | setExtra(); | ||||
} | } | ||||
/** | |||||
* Looks up an extra field by its header id. | |||||
* | |||||
* @return null if no such field exists. | |||||
*/ | |||||
public ZipExtraField getExtraField(ZipShort type) { | |||||
if (extraFields != null) { | |||||
return (ZipExtraField) extraFields.get(type); | |||||
} | |||||
return null; | |||||
} | |||||
/** | /** | ||||
* Throws an Exception if extra data cannot be parsed into extra fields. | * Throws an Exception if extra data cannot be parsed into extra fields. | ||||
* @param extra an array of bytes to be parsed into extra fields | * @param extra an array of bytes to be parsed into extra fields | ||||
@@ -23,12 +23,14 @@ import java.io.IOException; | |||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.RandomAccessFile; | import java.io.RandomAccessFile; | ||||
import java.io.UnsupportedEncodingException; | import java.io.UnsupportedEncodingException; | ||||
import java.nio.charset.CharacterCodingException; | |||||
import java.util.Calendar; | import java.util.Calendar; | ||||
import java.util.Collections; | import java.util.Collections; | ||||
import java.util.Date; | import java.util.Date; | ||||
import java.util.Enumeration; | import java.util.Enumeration; | ||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.Map; | import java.util.Map; | ||||
import java.util.zip.CRC32; | |||||
import java.util.zip.Inflater; | import java.util.zip.Inflater; | ||||
import java.util.zip.InflaterInputStream; | import java.util.zip.InflaterInputStream; | ||||
import java.util.zip.ZipException; | import java.util.zip.ZipException; | ||||
@@ -101,6 +103,11 @@ public class ZipFile { | |||||
*/ | */ | ||||
private RandomAccessFile archive; | private RandomAccessFile archive; | ||||
/** | |||||
* Whether to look for and use Unicode extra fields. | |||||
*/ | |||||
private final boolean useUnicodeExtraFields; | |||||
/** | /** | ||||
* Opens the given file for reading, assuming the platform's | * Opens the given file for reading, assuming the platform's | ||||
* native encoding for file names. | * native encoding for file names. | ||||
@@ -127,7 +134,7 @@ public class ZipFile { | |||||
/** | /** | ||||
* Opens the given file for reading, assuming the specified | * Opens the given file for reading, assuming the specified | ||||
* encoding for file names. | |||||
* encoding for file names and ignoring unicode extra fields. | |||||
* | * | ||||
* @param name name of the archive. | * @param name name of the archive. | ||||
* @param encoding the encoding to use for file names | * @param encoding the encoding to use for file names | ||||
@@ -135,7 +142,21 @@ public class ZipFile { | |||||
* @throws IOException if an error occurs while reading the file. | * @throws IOException if an error occurs while reading the file. | ||||
*/ | */ | ||||
public ZipFile(String name, String encoding) throws IOException { | public ZipFile(String name, String encoding) throws IOException { | ||||
this(new File(name), encoding); | |||||
this(new File(name), encoding, false); | |||||
} | |||||
/** | |||||
* Opens the given file for reading, assuming the specified | |||||
* encoding for file names and ignoring unicode extra fields. | |||||
* | |||||
* @param f the archive. | |||||
* @param encoding the encoding to use for file names, use null | |||||
* for the platform's default encoding | |||||
* | |||||
* @throws IOException if an error occurs while reading the file. | |||||
*/ | |||||
public ZipFile(File f, String encoding) throws IOException { | |||||
this(f, encoding, false); | |||||
} | } | ||||
/** | /** | ||||
@@ -144,16 +165,20 @@ public class ZipFile { | |||||
* | * | ||||
* @param f the archive. | * @param f the archive. | ||||
* @param encoding the encoding to use for file names | * @param encoding the encoding to use for file names | ||||
* @param whether to use InfoZIP Unicode Extra Fields (if present) | |||||
* to set the file names. | |||||
* | * | ||||
* @throws IOException if an error occurs while reading the file. | * @throws IOException if an error occurs while reading the file. | ||||
*/ | */ | ||||
public ZipFile(File f, String encoding) throws IOException { | |||||
public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) | |||||
throws IOException { | |||||
this.encoding = encoding; | this.encoding = encoding; | ||||
this.useUnicodeExtraFields = useUnicodeExtraFields; | |||||
archive = new RandomAccessFile(f, "r"); | archive = new RandomAccessFile(f, "r"); | ||||
boolean success = false; | boolean success = false; | ||||
try { | try { | ||||
populateFromCentralDirectory(); | |||||
resolveLocalFileHeaderData(); | |||||
Map entriesWithoutEFS = populateFromCentralDirectory(); | |||||
resolveLocalFileHeaderData(entriesWithoutEFS); | |||||
success = true; | success = true; | ||||
} finally { | } finally { | ||||
if (!success) { | if (!success) { | ||||
@@ -270,9 +295,15 @@ public class ZipFile { | |||||
* <p>The ZipEntrys will know all data that can be obtained from | * <p>The ZipEntrys will know all data that can be obtained from | ||||
* the central directory alone, but not the data that requires the | * the central directory alone, but not the data that requires the | ||||
* local file header or additional data to be read.</p> | * local file header or additional data to be read.</p> | ||||
* | |||||
* @return a Map<ZipEntry, NameAndComment>> of | |||||
* zipentries that didn't have the language encoding flag set when | |||||
* read. | |||||
*/ | */ | ||||
private void populateFromCentralDirectory() | |||||
private Map populateFromCentralDirectory() | |||||
throws IOException { | throws IOException { | ||||
HashMap noEFS = new HashMap(); | |||||
positionAtCentralDirectory(); | positionAtCentralDirectory(); | ||||
byte[] cfh = new byte[CFH_LEN]; | byte[] cfh = new byte[CFH_LEN]; | ||||
@@ -297,10 +328,10 @@ public class ZipFile { | |||||
off += SHORT; // skip version info | off += SHORT; // skip version info | ||||
final int generalPurposeFlag = ZipShort.getValue(cfh, off); | final int generalPurposeFlag = ZipShort.getValue(cfh, off); | ||||
final String entryEncoding = | |||||
(generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0 | |||||
? ZipOutputStream.UTF8 | |||||
: encoding; | |||||
final boolean hasEFS = | |||||
(generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0; | |||||
final String entryEncoding = | |||||
hasEFS ? ZipOutputStream.UTF8 : encoding; | |||||
off += SHORT; | off += SHORT; | ||||
@@ -368,7 +399,12 @@ public class ZipFile { | |||||
archive.readFully(signatureBytes); | archive.readFully(signatureBytes); | ||||
sig = ZipLong.getValue(signatureBytes); | sig = ZipLong.getValue(signatureBytes); | ||||
if (!hasEFS && useUnicodeExtraFields) { | |||||
noEFS.put(ze, new NameAndComment(fileName, comment)); | |||||
} | |||||
} | } | ||||
return noEFS; | |||||
} | } | ||||
private static final int MIN_EOCD_SIZE = | private static final int MIN_EOCD_SIZE = | ||||
@@ -463,7 +499,7 @@ public class ZipFile { | |||||
* <p>Also records the offsets for the data to read from the | * <p>Also records the offsets for the data to read from the | ||||
* entries.</p> | * entries.</p> | ||||
*/ | */ | ||||
private void resolveLocalFileHeaderData() | |||||
private void resolveLocalFileHeaderData(Map entriesWithoutEFS) | |||||
throws IOException { | throws IOException { | ||||
Enumeration e = getEntries(); | Enumeration e = getEntries(); | ||||
while (e.hasMoreElements()) { | while (e.hasMoreElements()) { | ||||
@@ -494,6 +530,12 @@ public class ZipFile { | |||||
*/ | */ | ||||
offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH | offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH | ||||
+ SHORT + SHORT + fileNameLen + extraFieldLen; | + SHORT + SHORT + fileNameLen + extraFieldLen; | ||||
if (entriesWithoutEFS.containsKey(ze)) { | |||||
setNameAndCommentFromExtraFields(ze, | |||||
(NameAndComment) | |||||
entriesWithoutEFS.get(ze)); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -551,7 +593,11 @@ public class ZipFile { | |||||
return new String(bytes); | return new String(bytes); | ||||
} else { | } else { | ||||
try { | try { | ||||
return ZipEncodingHelper.decodeName(bytes, enc); | |||||
try { | |||||
return ZipEncodingHelper.decodeName(bytes, enc); | |||||
} catch (CharacterCodingException ex) { | |||||
throw new ZipException(ex.getMessage()); | |||||
} | |||||
} catch (java.nio.charset.UnsupportedCharsetException ex) { | } catch (java.nio.charset.UnsupportedCharsetException ex) { | ||||
// Java 1.4's NIO doesn't recognize a few names that | // Java 1.4's NIO doesn't recognize a few names that | ||||
// String.getBytes does | // String.getBytes does | ||||
@@ -580,6 +626,64 @@ public class ZipFile { | |||||
return true; | return true; | ||||
} | } | ||||
/** | |||||
* If the entry has Unicode*ExtraFields and the CRCs of the | |||||
* names/comments match those of the extra fields, transfer the | |||||
* known Unicode values from the extra field. | |||||
*/ | |||||
private void setNameAndCommentFromExtraFields(ZipEntry ze, | |||||
NameAndComment nc) { | |||||
UnicodePathExtraField name = (UnicodePathExtraField) | |||||
ze.getExtraField(UnicodePathExtraField.UPATH_ID); | |||||
String originalName = ze.getName(); | |||||
String newName = getUnicodeStringIfOriginalMatches(name, nc.name); | |||||
if (newName != null && !originalName.equals(newName)) { | |||||
ze.setName(newName); | |||||
nameMap.remove(originalName); | |||||
nameMap.put(newName, ze); | |||||
} | |||||
if (nc.comment != null && nc.comment.length > 0) { | |||||
UnicodeCommentExtraField cmt = (UnicodeCommentExtraField) | |||||
ze.getExtraField(UnicodeCommentExtraField.UCOM_ID); | |||||
String newComment = | |||||
getUnicodeStringIfOriginalMatches(cmt, nc.comment); | |||||
if (newComment != null) { | |||||
ze.setComment(newComment); | |||||
} | |||||
} | |||||
} | |||||
/** | |||||
* If the stored CRC matches the one of the given name, return the | |||||
* Unicode name of the given field. | |||||
* | |||||
* <p>If the field is null or the CRCs don't match, return null | |||||
* instead.</p> | |||||
*/ | |||||
private String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField f, | |||||
byte[] orig) { | |||||
if (f != null) { | |||||
CRC32 crc32 = new CRC32(); | |||||
crc32.update(orig); | |||||
long origCRC32 = crc32.getValue(); | |||||
if (origCRC32 == f.getNameCRC32()) { | |||||
try { | |||||
return ZipEncodingHelper | |||||
.decodeName(f.getUnicodeName(), ZipOutputStream.UTF8); | |||||
} catch (CharacterCodingException ex) { | |||||
// UTF-8 unsupported? should be impossible the | |||||
// Unicode*ExtraField must contain some bad bytes | |||||
// TODO log this anywhere? | |||||
return null; | |||||
} | |||||
} | |||||
} | |||||
return null; | |||||
} | |||||
/** | /** | ||||
* InputStream that delegates requests to the underlying | * InputStream that delegates requests to the underlying | ||||
* RandomAccessFile, making sure that only bytes from a certain | * RandomAccessFile, making sure that only bytes from a certain | ||||
@@ -647,4 +751,12 @@ public class ZipFile { | |||||
} | } | ||||
} | } | ||||
private static final class NameAndComment { | |||||
private final byte[] name; | |||||
private final byte[] comment; | |||||
private NameAndComment(byte[] name, byte[] comment) { | |||||
this.name = name; | |||||
this.comment = comment; | |||||
} | |||||
} | |||||
} | } |
@@ -260,6 +260,11 @@ public class ZipOutputStream extends FilterOutputStream { | |||||
*/ | */ | ||||
private boolean useEFS = true; | private boolean useEFS = true; | ||||
/** | |||||
* whether to create UnicodePathExtraField-s for each entry. | |||||
*/ | |||||
private boolean createUnicodeExtraFields = false; | |||||
/** | /** | ||||
* Creates a new ZIP OutputStream filtering the underlying stream. | * Creates a new ZIP OutputStream filtering the underlying stream. | ||||
* @param out the outputstream to zip | * @param out the outputstream to zip | ||||
@@ -335,14 +340,24 @@ public class ZipOutputStream extends FilterOutputStream { | |||||
} | } | ||||
/** | /** | ||||
* Whether to set the EFS flag if the file name encoding is UTF-8. | |||||
* Whether to set the language encoding flag if the file name | |||||
* encoding is UTF-8. | |||||
* | * | ||||
* <p>Defaults to true.</p> | * <p>Defaults to true.</p> | ||||
*/ | */ | ||||
public void setUseEFS(boolean b) { | |||||
public void setUseLanguageEncodingFlag(boolean b) { | |||||
useEFS = b && isUTF8(encoding); | useEFS = b && isUTF8(encoding); | ||||
} | } | ||||
/** | |||||
* Whether to create Unicode Extra Fields for all entries. | |||||
* | |||||
* <p>Defaults to false.</p> | |||||
*/ | |||||
public void setCreateUnicodeExtraFields(boolean b) { | |||||
createUnicodeExtraFields = b; | |||||
} | |||||
/** | /** | ||||
* Finishs writing the contents and closes this as well as the | * Finishs writing the contents and closes this as well as the | ||||
* underlying stream. | * underlying stream. | ||||
@@ -638,6 +653,17 @@ public class ZipOutputStream extends FilterOutputStream { | |||||
* @since 1.1 | * @since 1.1 | ||||
*/ | */ | ||||
protected void writeLocalFileHeader(ZipEntry ze) throws IOException { | protected void writeLocalFileHeader(ZipEntry ze) throws IOException { | ||||
byte[] name = getBytes(ze.getName()); | |||||
if (createUnicodeExtraFields) { | |||||
ze.addExtraField(new UnicodePathExtraField(ze.getName(), name)); | |||||
String comm = ze.getComment(); | |||||
if (comm != null && !"".equals(comm)) { | |||||
byte[] commentB = getBytes(comm); | |||||
ze.addExtraField(new UnicodeCommentExtraField(comm, commentB)); | |||||
} | |||||
} | |||||
offsets.put(ze, ZipLong.getBytes(written)); | offsets.put(ze, ZipLong.getBytes(written)); | ||||
writeOut(LFH_SIG); | writeOut(LFH_SIG); | ||||
@@ -675,7 +701,6 @@ public class ZipOutputStream extends FilterOutputStream { | |||||
// CheckStyle:MagicNumber ON | // CheckStyle:MagicNumber ON | ||||
// file name length | // file name length | ||||
byte[] name = getBytes(ze.getName()); | |||||
writeOut(ZipShort.getBytes(name.length)); | writeOut(ZipShort.getBytes(name.length)); | ||||
written += SHORT; | written += SHORT; | ||||