git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748147 13f79535-47bb-0310-9956-ffa450edef68master
| @@ -34,23 +34,32 @@ public abstract class AbstractUnicodeExtraField implements ZipExtraField { | |||
| } | |||
| /** | |||
| * Assemble as unicode path extension form the name and encoding | |||
| * of the orginal zip entry. | |||
| * Assemble as unicode extension from the name/comment and | |||
| * encoding of the orginal zip entry. | |||
| * | |||
| * @param name The file name or comment. | |||
| * @param text The file name or comment. | |||
| * @param zipEncoding The encoding of the filenames in the zip | |||
| * file, usually <code>"CP437"</code>. | |||
| */ | |||
| protected AbstractUnicodeExtraField(String name, String zipEncoding) { | |||
| byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding); | |||
| protected AbstractUnicodeExtraField(String text, String zipEncoding) { | |||
| this(text, ZipEncodingHelper.encodeName(text, zipEncoding)); | |||
| } | |||
| /** | |||
| * Assemble as unicode extension from the name/comment and | |||
| * encoding of the orginal zip entry. | |||
| * | |||
| * @param text The file name or comment. | |||
| * @param zipEncoding The encoding of the filenames in the zip | |||
| * file, usually <code>"CP437"</code>. | |||
| */ | |||
| protected AbstractUnicodeExtraField(String text, byte[] bytes) { | |||
| CRC32 crc32 = new CRC32(); | |||
| crc32.update(filename); | |||
| crc32.update(bytes); | |||
| nameCRC32 = crc32.getValue(); | |||
| try { | |||
| unicodeName = name.getBytes("UTF-8"); | |||
| unicodeName = text.getBytes("UTF-8"); | |||
| } catch (UnsupportedEncodingException e) { | |||
| throw new RuntimeException("FATAL: UTF-8 encoding not supported.", | |||
| e); | |||
| @@ -42,15 +42,26 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { | |||
| } | |||
| /** | |||
| * Assemble as unicode comment extension form the comment and | |||
| * Assemble as unicode comment extension from the comment and | |||
| * encoding of the orginal zip entry. | |||
| * | |||
| * @param name The file name | |||
| * @param comment The file comment | |||
| * @param zipEncoding The encoding of the comment in the zip file, | |||
| * usually <code>"CP437"</code>. | |||
| */ | |||
| public UnicodeCommentExtraField(String name, String zipEncoding) { | |||
| super(name, zipEncoding); | |||
| public UnicodeCommentExtraField(String comment, String zipEncoding) { | |||
| super(comment, zipEncoding); | |||
| } | |||
| /** | |||
| * Assemble as unicode comment extension from the comment given as | |||
| * text as well as the bytes actually written to the archive. | |||
| * | |||
| * @param comment The file comment | |||
| * @param bytes the bytes actually written to the archive | |||
| */ | |||
| public UnicodeCommentExtraField(String comment, byte[] bytes) { | |||
| super(comment, bytes); | |||
| } | |||
| public ZipShort getHeaderId() { | |||
| @@ -42,7 +42,7 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||
| } | |||
| /** | |||
| * Assemble as unicode path extension form the name and encoding | |||
| * Assemble as unicode path extension from the name and encoding | |||
| * of the orginal zip entry. | |||
| * | |||
| * @param name The file name | |||
| @@ -53,6 +53,17 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||
| super(name, zipEncoding); | |||
| } | |||
| /** | |||
| * Assemble as unicode path extension from the name given as | |||
| * text as well as the bytes actually written to the archive. | |||
| * | |||
| * @param name The file name | |||
| * @param bytes the bytes actually written to the archive | |||
| */ | |||
| public UnicodePathExtraField(String name, byte[] bytes) { | |||
| super(name, bytes); | |||
| } | |||
| public ZipShort getHeaderId() { | |||
| return UPATH_ID; | |||
| } | |||
| @@ -75,17 +75,23 @@ abstract class ZipEncodingHelper { | |||
| * </pre> | |||
| * | |||
| * @param name The filename or comment with possible non-ASCII | |||
| * unicode characters. | |||
| * unicode characters. Must not be null. | |||
| * @param encoding A valid encoding name. The standard zip | |||
| * encoding is <code>"CP437"</code>, | |||
| * <code>"UTF-8"</code> is supported in ZIP file | |||
| * version <code>6.3</code> or later. | |||
| * version <code>6.3</code> or later. If null, | |||
| * will use the platform's {@link | |||
| * java.lang.String#getBytes default encoding}. | |||
| * @return A byte array containing the mapped file | |||
| * name. Unmappable characters or malformed character | |||
| * sequences are mapped to a sequence of utf-16 words | |||
| * encoded in the format <code>%Uxxxx</code>. | |||
| */ | |||
| static final byte[] encodeName(String name, String encoding) { | |||
| if (encoding == null) { | |||
| return name.getBytes(); | |||
| } | |||
| Charset cs = Charset.forName(encoding); | |||
| CharsetEncoder enc = cs.newEncoder(); | |||
| @@ -178,8 +184,12 @@ abstract class ZipEncodingHelper { | |||
| * <code>"UTF-8"</code> is supported in ZIP file | |||
| * version <code>6.3</code> or later. | |||
| */ | |||
| static final String decodeName(byte[] name, String encoding) { | |||
| static final String decodeName(byte[] name, String encoding) | |||
| throws java.nio.charset.CharacterCodingException { | |||
| Charset cs = Charset.forName(encoding); | |||
| return cs.decode(ByteBuffer.wrap(name)).toString(); | |||
| return cs.newDecoder() | |||
| .onMalformedInput(CodingErrorAction.REPORT) | |||
| .onUnmappableCharacter(CodingErrorAction.REPORT) | |||
| .decode(ByteBuffer.wrap(name)).toString(); | |||
| } | |||
| } | |||
| @@ -263,6 +263,18 @@ public class ZipEntry extends java.util.zip.ZipEntry implements Cloneable { | |||
| setExtra(); | |||
| } | |||
| /** | |||
| * Looks up an extra field by its header id. | |||
| * | |||
| * @return null if no such field exists. | |||
| */ | |||
| public ZipExtraField getExtraField(ZipShort type) { | |||
| if (extraFields != null) { | |||
| return (ZipExtraField) extraFields.get(type); | |||
| } | |||
| return null; | |||
| } | |||
| /** | |||
| * Throws an Exception if extra data cannot be parsed into extra fields. | |||
| * @param extra an array of bytes to be parsed into extra fields | |||
| @@ -23,12 +23,14 @@ import java.io.IOException; | |||
| import java.io.InputStream; | |||
| import java.io.RandomAccessFile; | |||
| import java.io.UnsupportedEncodingException; | |||
| import java.nio.charset.CharacterCodingException; | |||
| import java.util.Calendar; | |||
| import java.util.Collections; | |||
| import java.util.Date; | |||
| import java.util.Enumeration; | |||
| import java.util.HashMap; | |||
| import java.util.Map; | |||
| import java.util.zip.CRC32; | |||
| import java.util.zip.Inflater; | |||
| import java.util.zip.InflaterInputStream; | |||
| import java.util.zip.ZipException; | |||
| @@ -101,6 +103,11 @@ public class ZipFile { | |||
| */ | |||
| private RandomAccessFile archive; | |||
| /** | |||
| * Whether to look for and use Unicode extra fields. | |||
| */ | |||
| private final boolean useUnicodeExtraFields; | |||
| /** | |||
| * Opens the given file for reading, assuming the platform's | |||
| * native encoding for file names. | |||
| @@ -127,7 +134,7 @@ public class ZipFile { | |||
| /** | |||
| * Opens the given file for reading, assuming the specified | |||
| * encoding for file names. | |||
| * encoding for file names and ignoring unicode extra fields. | |||
| * | |||
| * @param name name of the archive. | |||
| * @param encoding the encoding to use for file names | |||
| @@ -135,7 +142,21 @@ public class ZipFile { | |||
| * @throws IOException if an error occurs while reading the file. | |||
| */ | |||
| public ZipFile(String name, String encoding) throws IOException { | |||
| this(new File(name), encoding); | |||
| this(new File(name), encoding, false); | |||
| } | |||
| /** | |||
| * Opens the given file for reading, assuming the specified | |||
| * encoding for file names and ignoring unicode extra fields. | |||
| * | |||
| * @param f the archive. | |||
| * @param encoding the encoding to use for file names, use null | |||
| * for the platform's default encoding | |||
| * | |||
| * @throws IOException if an error occurs while reading the file. | |||
| */ | |||
| public ZipFile(File f, String encoding) throws IOException { | |||
| this(f, encoding, false); | |||
| } | |||
| /** | |||
| @@ -144,16 +165,20 @@ public class ZipFile { | |||
| * | |||
| * @param f the archive. | |||
| * @param encoding the encoding to use for file names | |||
| * @param whether to use InfoZIP Unicode Extra Fields (if present) | |||
| * to set the file names. | |||
| * | |||
| * @throws IOException if an error occurs while reading the file. | |||
| */ | |||
| public ZipFile(File f, String encoding) throws IOException { | |||
| public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) | |||
| throws IOException { | |||
| this.encoding = encoding; | |||
| this.useUnicodeExtraFields = useUnicodeExtraFields; | |||
| archive = new RandomAccessFile(f, "r"); | |||
| boolean success = false; | |||
| try { | |||
| populateFromCentralDirectory(); | |||
| resolveLocalFileHeaderData(); | |||
| Map entriesWithoutEFS = populateFromCentralDirectory(); | |||
| resolveLocalFileHeaderData(entriesWithoutEFS); | |||
| success = true; | |||
| } finally { | |||
| if (!success) { | |||
| @@ -270,9 +295,15 @@ public class ZipFile { | |||
| * <p>The ZipEntrys will know all data that can be obtained from | |||
| * the central directory alone, but not the data that requires the | |||
| * local file header or additional data to be read.</p> | |||
| * | |||
| * @return a Map<ZipEntry, NameAndComment>> of | |||
| * zipentries that didn't have the language encoding flag set when | |||
| * read. | |||
| */ | |||
| private void populateFromCentralDirectory() | |||
| private Map populateFromCentralDirectory() | |||
| throws IOException { | |||
| HashMap noEFS = new HashMap(); | |||
| positionAtCentralDirectory(); | |||
| byte[] cfh = new byte[CFH_LEN]; | |||
| @@ -297,10 +328,10 @@ public class ZipFile { | |||
| off += SHORT; // skip version info | |||
| final int generalPurposeFlag = ZipShort.getValue(cfh, off); | |||
| final String entryEncoding = | |||
| (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0 | |||
| ? ZipOutputStream.UTF8 | |||
| : encoding; | |||
| final boolean hasEFS = | |||
| (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0; | |||
| final String entryEncoding = | |||
| hasEFS ? ZipOutputStream.UTF8 : encoding; | |||
| off += SHORT; | |||
| @@ -368,7 +399,12 @@ public class ZipFile { | |||
| archive.readFully(signatureBytes); | |||
| sig = ZipLong.getValue(signatureBytes); | |||
| if (!hasEFS && useUnicodeExtraFields) { | |||
| noEFS.put(ze, new NameAndComment(fileName, comment)); | |||
| } | |||
| } | |||
| return noEFS; | |||
| } | |||
| private static final int MIN_EOCD_SIZE = | |||
| @@ -463,7 +499,7 @@ public class ZipFile { | |||
| * <p>Also records the offsets for the data to read from the | |||
| * entries.</p> | |||
| */ | |||
| private void resolveLocalFileHeaderData() | |||
| private void resolveLocalFileHeaderData(Map entriesWithoutEFS) | |||
| throws IOException { | |||
| Enumeration e = getEntries(); | |||
| while (e.hasMoreElements()) { | |||
| @@ -494,6 +530,12 @@ public class ZipFile { | |||
| */ | |||
| offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH | |||
| + SHORT + SHORT + fileNameLen + extraFieldLen; | |||
| if (entriesWithoutEFS.containsKey(ze)) { | |||
| setNameAndCommentFromExtraFields(ze, | |||
| (NameAndComment) | |||
| entriesWithoutEFS.get(ze)); | |||
| } | |||
| } | |||
| } | |||
| @@ -551,7 +593,11 @@ public class ZipFile { | |||
| return new String(bytes); | |||
| } else { | |||
| try { | |||
| return ZipEncodingHelper.decodeName(bytes, enc); | |||
| try { | |||
| return ZipEncodingHelper.decodeName(bytes, enc); | |||
| } catch (CharacterCodingException ex) { | |||
| throw new ZipException(ex.getMessage()); | |||
| } | |||
| } catch (java.nio.charset.UnsupportedCharsetException ex) { | |||
| // Java 1.4's NIO doesn't recognize a few names that | |||
| // String.getBytes does | |||
| @@ -580,6 +626,64 @@ public class ZipFile { | |||
| return true; | |||
| } | |||
| /** | |||
| * If the entry has Unicode*ExtraFields and the CRCs of the | |||
| * names/comments match those of the extra fields, transfer the | |||
| * known Unicode values from the extra field. | |||
| */ | |||
| private void setNameAndCommentFromExtraFields(ZipEntry ze, | |||
| NameAndComment nc) { | |||
| UnicodePathExtraField name = (UnicodePathExtraField) | |||
| ze.getExtraField(UnicodePathExtraField.UPATH_ID); | |||
| String originalName = ze.getName(); | |||
| String newName = getUnicodeStringIfOriginalMatches(name, nc.name); | |||
| if (newName != null && !originalName.equals(newName)) { | |||
| ze.setName(newName); | |||
| nameMap.remove(originalName); | |||
| nameMap.put(newName, ze); | |||
| } | |||
| if (nc.comment != null && nc.comment.length > 0) { | |||
| UnicodeCommentExtraField cmt = (UnicodeCommentExtraField) | |||
| ze.getExtraField(UnicodeCommentExtraField.UCOM_ID); | |||
| String newComment = | |||
| getUnicodeStringIfOriginalMatches(cmt, nc.comment); | |||
| if (newComment != null) { | |||
| ze.setComment(newComment); | |||
| } | |||
| } | |||
| } | |||
| /** | |||
| * If the stored CRC matches the one of the given name, return the | |||
| * Unicode name of the given field. | |||
| * | |||
| * <p>If the field is null or the CRCs don't match, return null | |||
| * instead.</p> | |||
| */ | |||
| private String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField f, | |||
| byte[] orig) { | |||
| if (f != null) { | |||
| CRC32 crc32 = new CRC32(); | |||
| crc32.update(orig); | |||
| long origCRC32 = crc32.getValue(); | |||
| if (origCRC32 == f.getNameCRC32()) { | |||
| try { | |||
| return ZipEncodingHelper | |||
| .decodeName(f.getUnicodeName(), ZipOutputStream.UTF8); | |||
| } catch (CharacterCodingException ex) { | |||
| // UTF-8 unsupported? should be impossible the | |||
| // Unicode*ExtraField must contain some bad bytes | |||
| // TODO log this anywhere? | |||
| return null; | |||
| } | |||
| } | |||
| } | |||
| return null; | |||
| } | |||
| /** | |||
| * InputStream that delegates requests to the underlying | |||
| * RandomAccessFile, making sure that only bytes from a certain | |||
| @@ -647,4 +751,12 @@ public class ZipFile { | |||
| } | |||
| } | |||
| private static final class NameAndComment { | |||
| private final byte[] name; | |||
| private final byte[] comment; | |||
| private NameAndComment(byte[] name, byte[] comment) { | |||
| this.name = name; | |||
| this.comment = comment; | |||
| } | |||
| } | |||
| } | |||
| @@ -260,6 +260,11 @@ public class ZipOutputStream extends FilterOutputStream { | |||
| */ | |||
| private boolean useEFS = true; | |||
| /** | |||
| * whether to create UnicodePathExtraField-s for each entry. | |||
| */ | |||
| private boolean createUnicodeExtraFields = false; | |||
| /** | |||
| * Creates a new ZIP OutputStream filtering the underlying stream. | |||
| * @param out the outputstream to zip | |||
| @@ -335,14 +340,24 @@ public class ZipOutputStream extends FilterOutputStream { | |||
| } | |||
| /** | |||
| * Whether to set the EFS flag if the file name encoding is UTF-8. | |||
| * Whether to set the language encoding flag if the file name | |||
| * encoding is UTF-8. | |||
| * | |||
| * <p>Defaults to true.</p> | |||
| */ | |||
| public void setUseEFS(boolean b) { | |||
| public void setUseLanguageEncodingFlag(boolean b) { | |||
| useEFS = b && isUTF8(encoding); | |||
| } | |||
| /** | |||
| * Whether to create Unicode Extra Fields for all entries. | |||
| * | |||
| * <p>Defaults to false.</p> | |||
| */ | |||
| public void setCreateUnicodeExtraFields(boolean b) { | |||
| createUnicodeExtraFields = b; | |||
| } | |||
| /** | |||
| * Finishs writing the contents and closes this as well as the | |||
| * underlying stream. | |||
| @@ -638,6 +653,17 @@ public class ZipOutputStream extends FilterOutputStream { | |||
| * @since 1.1 | |||
| */ | |||
| protected void writeLocalFileHeader(ZipEntry ze) throws IOException { | |||
| byte[] name = getBytes(ze.getName()); | |||
| if (createUnicodeExtraFields) { | |||
| ze.addExtraField(new UnicodePathExtraField(ze.getName(), name)); | |||
| String comm = ze.getComment(); | |||
| if (comm != null && !"".equals(comm)) { | |||
| byte[] commentB = getBytes(comm); | |||
| ze.addExtraField(new UnicodeCommentExtraField(comm, commentB)); | |||
| } | |||
| } | |||
| offsets.put(ze, ZipLong.getBytes(written)); | |||
| writeOut(LFH_SIG); | |||
| @@ -675,7 +701,6 @@ public class ZipOutputStream extends FilterOutputStream { | |||
| // CheckStyle:MagicNumber ON | |||
| // file name length | |||
| byte[] name = getBytes(ze.getName()); | |||
| writeOut(ZipShort.getBytes(name.length)); | |||
| written += SHORT; | |||