optionally read/write InfoZIP unicode fields, merge from commons-compress

git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@748147 13f79535-47bb-0310-9956-ffa450edef68
16 years ago · 8a183a492d
--- a/src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java
+++ b/src/main/org/apache/tools/zip/AbstractUnicodeExtraField.java
@@ -34,23 +34,32 @@ public abstract class AbstractUnicodeExtraField implements ZipExtraField {
    }

    /**
     * Assemble as unicode path extension form the name and encoding
     * of the orginal zip entry.
     * Assemble as unicode extension from the name/comment and
     * encoding of the orginal zip entry.
     * 
     * @param name The file name or comment.
     * @param text The file name or comment.
     * @param zipEncoding The encoding of the filenames in the zip
     * file, usually <code>"CP437"</code>.
     */
    protected AbstractUnicodeExtraField(String name, String zipEncoding) {

        byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding);
    protected AbstractUnicodeExtraField(String text, String zipEncoding) {
        this(text, ZipEncodingHelper.encodeName(text, zipEncoding));
    }

    /**
     * Assemble as unicode extension from the name/comment and
     * encoding of the orginal zip entry.
     * 
     * @param text The file name or comment.
     * @param zipEncoding The encoding of the filenames in the zip
     * file, usually <code>"CP437"</code>.
     */
    protected AbstractUnicodeExtraField(String text, byte[] bytes) {
        CRC32 crc32 = new CRC32();
        crc32.update(filename);
        crc32.update(bytes);
        nameCRC32 = crc32.getValue();

        try {
            unicodeName = name.getBytes("UTF-8");
            unicodeName = text.getBytes("UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException("FATAL: UTF-8 encoding not supported.",
                                       e);
--- a/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
+++ b/src/main/org/apache/tools/zip/UnicodeCommentExtraField.java
@@ -42,15 +42,26 @@ public class UnicodeCommentExtraField extends AbstractUnicodeExtraField {
    }

    /**
     * Assemble as unicode comment extension form the comment and
     * Assemble as unicode comment extension from the comment and
     * encoding of the orginal zip entry.
     * 
     * @param name The file name
     * @param comment The file comment
     * @param zipEncoding The encoding of the comment in the zip file,
     * usually <code>"CP437"</code>.
     */
    public UnicodeCommentExtraField(String name, String zipEncoding) {
        super(name, zipEncoding);
    public UnicodeCommentExtraField(String comment, String zipEncoding) {
        super(comment, zipEncoding);
    }

    /**
     * Assemble as unicode comment extension from the comment given as
     * text as well as the bytes actually written to the archive.
     * 
     * @param comment The file comment
     * @param bytes the bytes actually written to the archive
     */
    public UnicodeCommentExtraField(String comment, byte[] bytes) {
        super(comment, bytes);
    }

    public ZipShort getHeaderId() {
--- a/src/main/org/apache/tools/zip/UnicodePathExtraField.java
+++ b/src/main/org/apache/tools/zip/UnicodePathExtraField.java
@@ -42,7 +42,7 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField {
    }

    /**
     * Assemble as unicode path extension form the name and encoding
     * Assemble as unicode path extension from the name and encoding
     * of the orginal zip entry.
     * 
     * @param name The file name
@@ -53,6 +53,17 @@ public class UnicodePathExtraField extends AbstractUnicodeExtraField {
        super(name, zipEncoding);
    }

    /**
     * Assemble as unicode path extension from the name given as
     * text as well as the bytes actually written to the archive.
     * 
     * @param name The file name
     * @param bytes the bytes actually written to the archive
     */
    public UnicodePathExtraField(String name, byte[] bytes) {
        super(name, bytes);
    }

    public ZipShort getHeaderId() {
        return UPATH_ID;
    }
--- a/src/main/org/apache/tools/zip/ZipEncodingHelper.java
+++ b/src/main/org/apache/tools/zip/ZipEncodingHelper.java
@@ -75,17 +75,23 @@ abstract class ZipEncodingHelper {
     * </pre>
     * 
     * @param name The filename or comment with possible non-ASCII
     * unicode characters.
     * unicode characters.  Must not be null.
     * @param encoding A valid encoding name. The standard zip
     *                 encoding is <code>"CP437"</code>,
     *                 <code>"UTF-8"</code> is supported in ZIP file
     *                 version <code>6.3</code> or later.
     *                 version <code>6.3</code> or later.  If null,
     *                 will use the platform's {@link
     *                 java.lang.String#getBytes default encoding}.
     * @return A byte array containing the mapped file
     *         name. Unmappable characters or malformed character
     *         sequences are mapped to a sequence of utf-16 words
     *         encoded in the format <code>%Uxxxx</code>.
     */
    static final byte[] encodeName(String name, String encoding) {
        if (encoding == null) {
            return name.getBytes();
        }

        Charset cs = Charset.forName(encoding);
        CharsetEncoder enc = cs.newEncoder();

@@ -178,8 +184,12 @@ abstract class ZipEncodingHelper {
     *                 <code>"UTF-8"</code> is supported in ZIP file
     *                 version <code>6.3</code> or later.
     */
    static final String decodeName(byte[] name, String encoding) {
    static final String decodeName(byte[] name, String encoding)
        throws java.nio.charset.CharacterCodingException {
        Charset cs = Charset.forName(encoding);
        return cs.decode(ByteBuffer.wrap(name)).toString();
        return cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT)
            .decode(ByteBuffer.wrap(name)).toString();
    }
 }
--- a/src/main/org/apache/tools/zip/ZipEntry.java
+++ b/src/main/org/apache/tools/zip/ZipEntry.java
@@ -263,6 +263,18 @@ public class ZipEntry extends java.util.zip.ZipEntry implements Cloneable {
        setExtra();
    }

    /**
     * Looks up an extra field by its header id.
     *
     * @return null if no such field exists.
     */
    public ZipExtraField getExtraField(ZipShort type) {
        if (extraFields != null) {
            return (ZipExtraField) extraFields.get(type);
        }
        return null;
    }

    /**
     * Throws an Exception if extra data cannot be parsed into extra fields.
     * @param extra an array of bytes to be parsed into extra fields
--- a/src/main/org/apache/tools/zip/ZipFile.java
+++ b/src/main/org/apache/tools/zip/ZipFile.java
@@ -23,12 +23,14 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.io.UnsupportedEncodingException;
 import java.nio.charset.CharacterCodingException;
 import java.util.Calendar;
 import java.util.Collections;
 import java.util.Date;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.zip.CRC32;
 import java.util.zip.Inflater;
 import java.util.zip.InflaterInputStream;
 import java.util.zip.ZipException;
@@ -101,6 +103,11 @@ public class ZipFile {
     */
    private RandomAccessFile archive;

    /**
     * Whether to look for and use Unicode extra fields.
     */
    private final boolean useUnicodeExtraFields;

    /**
     * Opens the given file for reading, assuming the platform's
     * native encoding for file names.
@@ -127,7 +134,7 @@ public class ZipFile {

    /**
     * Opens the given file for reading, assuming the specified
     * encoding for file names.
     * encoding for file names and ignoring unicode extra fields.
     *
     * @param name name of the archive.
     * @param encoding the encoding to use for file names
@@ -135,7 +142,21 @@ public class ZipFile {
     * @throws IOException if an error occurs while reading the file.
     */
    public ZipFile(String name, String encoding) throws IOException {
        this(new File(name), encoding);
        this(new File(name), encoding, false);
    }

    /**
     * Opens the given file for reading, assuming the specified
     * encoding for file names and ignoring unicode extra fields.
     *
     * @param f the archive.
     * @param encoding the encoding to use for file names, use null
     * for the platform's default encoding
     *
     * @throws IOException if an error occurs while reading the file.
     */
    public ZipFile(File f, String encoding) throws IOException {
        this(f, encoding, false);
    }

    /**
@@ -144,16 +165,20 @@ public class ZipFile {
     *
     * @param f the archive.
     * @param encoding the encoding to use for file names
     * @param whether to use InfoZIP Unicode Extra Fields (if present)
     * to set the file names.
     *
     * @throws IOException if an error occurs while reading the file.
     */
    public ZipFile(File f, String encoding) throws IOException {
    public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
        throws IOException {
        this.encoding = encoding;
        this.useUnicodeExtraFields = useUnicodeExtraFields;
        archive = new RandomAccessFile(f, "r");
        boolean success = false;
        try {
            populateFromCentralDirectory();
            resolveLocalFileHeaderData();
            Map entriesWithoutEFS = populateFromCentralDirectory();
            resolveLocalFileHeaderData(entriesWithoutEFS);
            success = true;
        } finally {
            if (!success) {
@@ -270,9 +295,15 @@ public class ZipFile {
     * <p>The ZipEntrys will know all data that can be obtained from
     * the central directory alone, but not the data that requires the
     * local file header or additional data to be read.</p>
     *
     * @return a Map&lt;ZipEntry, NameAndComment>&gt; of
     * zipentries that didn't have the language encoding flag set when
     * read.
     */
    private void populateFromCentralDirectory()
    private Map populateFromCentralDirectory()
        throws IOException {
        HashMap noEFS = new HashMap();

        positionAtCentralDirectory();

        byte[] cfh = new byte[CFH_LEN];
@@ -297,10 +328,10 @@ public class ZipFile {
            off += SHORT; // skip version info

            final int generalPurposeFlag = ZipShort.getValue(cfh, off);
            final String entryEncoding = 
                (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0
                ? ZipOutputStream.UTF8
                : encoding;
            final boolean hasEFS = 
                (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0;
            final String entryEncoding =
                hasEFS ? ZipOutputStream.UTF8 : encoding;

            off += SHORT;

@@ -368,7 +399,12 @@ public class ZipFile {

            archive.readFully(signatureBytes);
            sig = ZipLong.getValue(signatureBytes);

            if (!hasEFS && useUnicodeExtraFields) {
                noEFS.put(ze, new NameAndComment(fileName, comment));
            }
        }
        return noEFS;
    }

    private static final int MIN_EOCD_SIZE =
@@ -463,7 +499,7 @@ public class ZipFile {
     * <p>Also records the offsets for the data to read from the
     * entries.</p>
     */
    private void resolveLocalFileHeaderData()
    private void resolveLocalFileHeaderData(Map entriesWithoutEFS)
        throws IOException {
        Enumeration e = getEntries();
        while (e.hasMoreElements()) {
@@ -494,6 +530,12 @@ public class ZipFile {
            */
            offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
                                     + SHORT + SHORT + fileNameLen + extraFieldLen;

            if (entriesWithoutEFS.containsKey(ze)) {
                setNameAndCommentFromExtraFields(ze,
                                                 (NameAndComment)
                                                 entriesWithoutEFS.get(ze));
            }
        }
    }

@@ -551,7 +593,11 @@ public class ZipFile {
            return new String(bytes);
        } else {
            try {
                return ZipEncodingHelper.decodeName(bytes, enc);
                try {
                    return ZipEncodingHelper.decodeName(bytes, enc);
                } catch (CharacterCodingException ex) {
                    throw new ZipException(ex.getMessage());
                }
            } catch (java.nio.charset.UnsupportedCharsetException ex) {
                // Java 1.4's NIO doesn't recognize a few names that
                // String.getBytes does
@@ -580,6 +626,64 @@ public class ZipFile {
        return true;
    }

    /**
     * If the entry has Unicode*ExtraFields and the CRCs of the
     * names/comments match those of the extra fields, transfer the
     * known Unicode values from the extra field.
     */
    private void setNameAndCommentFromExtraFields(ZipEntry ze,
                                                  NameAndComment nc) {
        UnicodePathExtraField name = (UnicodePathExtraField)
            ze.getExtraField(UnicodePathExtraField.UPATH_ID);
        String originalName = ze.getName();
        String newName = getUnicodeStringIfOriginalMatches(name, nc.name);
        if (newName != null && !originalName.equals(newName)) {
            ze.setName(newName);
            nameMap.remove(originalName);
            nameMap.put(newName, ze);
        }

        if (nc.comment != null && nc.comment.length > 0) {
            UnicodeCommentExtraField cmt = (UnicodeCommentExtraField)
                ze.getExtraField(UnicodeCommentExtraField.UCOM_ID);
            String newComment =
                getUnicodeStringIfOriginalMatches(cmt, nc.comment);
            if (newComment != null) {
                ze.setComment(newComment);
            }
        }
    }

    /**
     * If the stored CRC matches the one of the given name, return the
     * Unicode name of the given field.
     *
     * <p>If the field is null or the CRCs don't match, return null
     * instead.</p>
     */
    private String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField f,
                                                     byte[] orig) {
        if (f != null) {
            CRC32 crc32 = new CRC32();
            crc32.update(orig);
            long origCRC32 = crc32.getValue();

            if (origCRC32 == f.getNameCRC32()) {
                try {
                    return ZipEncodingHelper
                        .decodeName(f.getUnicodeName(), ZipOutputStream.UTF8);
                } catch (CharacterCodingException ex) {
                    // UTF-8 unsupported?  should be impossible the
                    // Unicode*ExtraField must contain some bad bytes

                    // TODO log this anywhere?
                    return null;
                }
            }
        }
        return null;
    }

    /**
     * InputStream that delegates requests to the underlying
     * RandomAccessFile, making sure that only bytes from a certain
@@ -647,4 +751,12 @@ public class ZipFile {
        }
    }

    private static final class NameAndComment {
        private final byte[] name;
        private final byte[] comment;
        private NameAndComment(byte[] name, byte[] comment) {
            this.name = name;
            this.comment = comment;
        }
    }
 }
--- a/src/main/org/apache/tools/zip/ZipOutputStream.java
+++ b/src/main/org/apache/tools/zip/ZipOutputStream.java
@@ -260,6 +260,11 @@ public class ZipOutputStream extends FilterOutputStream {
     */
    private boolean useEFS = true; 

    /**
     * whether to create UnicodePathExtraField-s for each entry.
     */
    private boolean createUnicodeExtraFields = false;

    /**
     * Creates a new ZIP OutputStream filtering the underlying stream.
     * @param out the outputstream to zip
@@ -335,14 +340,24 @@ public class ZipOutputStream extends FilterOutputStream {
    }

    /**
     * Whether to set the EFS flag if the file name encoding is UTF-8.
     * Whether to set the language encoding flag if the file name
     * encoding is UTF-8.
     *
     * <p>Defaults to true.</p>
     */
    public void setUseEFS(boolean b) {
    public void setUseLanguageEncodingFlag(boolean b) {
        useEFS = b && isUTF8(encoding);
    }

    /**
     * Whether to create Unicode Extra Fields for all entries.
     *
     * <p>Defaults to false.</p>
     */
    public void setCreateUnicodeExtraFields(boolean b) {
        createUnicodeExtraFields = b;
    }

    /**
     * Finishs writing the contents and closes this as well as the
     * underlying stream.
@@ -638,6 +653,17 @@ public class ZipOutputStream extends FilterOutputStream {
     * @since 1.1
     */
    protected void writeLocalFileHeader(ZipEntry ze) throws IOException {

        byte[] name = getBytes(ze.getName());
        if (createUnicodeExtraFields) {
            ze.addExtraField(new UnicodePathExtraField(ze.getName(), name));
            String comm = ze.getComment();
            if (comm != null && !"".equals(comm)) {
                byte[] commentB = getBytes(comm);
                ze.addExtraField(new UnicodeCommentExtraField(comm, commentB));
            }
        }

        offsets.put(ze, ZipLong.getBytes(written));

        writeOut(LFH_SIG);
@@ -675,7 +701,6 @@ public class ZipOutputStream extends FilterOutputStream {
        // CheckStyle:MagicNumber ON

        // file name length
        byte[] name = getBytes(ze.getName());
        writeOut(ZipShort.getBytes(name.length));
        written += SHORT;