git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@745531 13f79535-47bb-0310-9956-ffa450edef68master
| @@ -348,6 +348,9 @@ Fixed bugs: | |||||
| VM was running) for files with an unexpected internal structure. | VM was running) for files with an unexpected internal structure. | ||||
| Bugzilla Report 46559. | Bugzilla Report 46559. | ||||
| * The zip package now supports the extra fields invented by InfoZIP | |||||
| in order to store Unicode file names and comments. | |||||
| Other changes: | Other changes: | ||||
| -------------- | -------------- | ||||
| * A HostInfo task was added performing information on hosts, including info on | * A HostInfo task was added performing information on hosts, including info on | ||||
| @@ -0,0 +1,147 @@ | |||||
| /* | |||||
| * Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| * contributor license agreements. See the NOTICE file distributed with | |||||
| * this work for additional information regarding copyright ownership. | |||||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| * (the "License"); you may not use this file except in compliance with | |||||
| * the License. You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| * | |||||
| */ | |||||
| package org.apache.tools.zip; | |||||
| import java.io.UnsupportedEncodingException; | |||||
| import java.util.zip.CRC32; | |||||
| import java.util.zip.ZipException; | |||||
| /** | |||||
| * A common base class for Unicode extra information extra fields. | |||||
| */ | |||||
| public abstract class AbstractUnicodeExtraField implements ZipExtraField { | |||||
| private long nameCRC32; | |||||
| private byte[] unicodeName; | |||||
| private byte[] data; | |||||
| protected AbstractUnicodeExtraField() { | |||||
| } | |||||
| /** | |||||
| * Assemble as unicode path extension form the name and encoding | |||||
| * of the orginal zip entry. | |||||
| * | |||||
| * @param name The file name or comment. | |||||
| * @param zipEncoding The encoding of the filenames in the zip | |||||
| * file, usually <code>"CP437"</code>. | |||||
| */ | |||||
| protected AbstractUnicodeExtraField(String name, String zipEncoding) { | |||||
| byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding); | |||||
| CRC32 crc32 = new CRC32(); | |||||
| crc32.update(filename); | |||||
| nameCRC32 = crc32.getValue(); | |||||
| try { | |||||
| unicodeName = name.getBytes("UTF-8"); | |||||
| } catch (UnsupportedEncodingException e) { | |||||
| throw new RuntimeException("FATAL: UTF-8 encoding not supported.", | |||||
| e); | |||||
| } | |||||
| } | |||||
| private void assembleData() { | |||||
| if (unicodeName == null) { | |||||
| return; | |||||
| } | |||||
| data = new byte[5 + unicodeName.length]; | |||||
| // version 1 | |||||
| data[0] = 0x01; | |||||
| System.arraycopy(ZipLong.getBytes(nameCRC32), 0, data, 1, 4); | |||||
| System.arraycopy(unicodeName, 0, data, 5, unicodeName.length); | |||||
| } | |||||
| /** | |||||
| * @return The CRC32 checksum of the filename or comment as | |||||
| * encoded in the central directory of the zip file. | |||||
| */ | |||||
| public long getNameCRC32() { | |||||
| return nameCRC32; | |||||
| } | |||||
| /** | |||||
| * @param nameCRC32 The CRC32 checksum of the filename as encoded | |||||
| * in the central directory of the zip file to set. | |||||
| */ | |||||
| public void setNameCRC32(long nameCRC32) { | |||||
| nameCRC32 = nameCRC32; | |||||
| data = null; | |||||
| } | |||||
| /** | |||||
| * @return The utf-8 encoded name. | |||||
| */ | |||||
| public byte[] getUnicodeName() { | |||||
| return unicodeName; | |||||
| } | |||||
| /** | |||||
| * @param unicodeName The utf-8 encoded name to set. | |||||
| */ | |||||
| public void setUnicodeName(byte[] unicodeName) { | |||||
| unicodeName = unicodeName; | |||||
| data = null; | |||||
| } | |||||
| public byte[] getCentralDirectoryData() { | |||||
| if (data == null) { | |||||
| this.assembleData(); | |||||
| } | |||||
| return data; | |||||
| } | |||||
| public ZipShort getCentralDirectoryLength() { | |||||
| if (data == null) { | |||||
| assembleData(); | |||||
| } | |||||
| return new ZipShort(data.length); | |||||
| } | |||||
| public byte[] getLocalFileDataData() { | |||||
| return getCentralDirectoryData(); | |||||
| } | |||||
| public ZipShort getLocalFileDataLength() { | |||||
| return getCentralDirectoryLength(); | |||||
| } | |||||
| public void parseFromLocalFileData(byte[] buffer, int offset, int length) | |||||
| throws ZipException { | |||||
| if (length < 5) { | |||||
| throw new ZipException("UniCode path extra data must have at least" | |||||
| + " 5 bytes."); | |||||
| } | |||||
| int version = buffer[offset]; | |||||
| if (version != 0x01) { | |||||
| throw new ZipException("Unsupported version [" + version | |||||
| + "] for UniCode path extra data."); | |||||
| } | |||||
| nameCRC32 = ZipLong.getValue(buffer, offset + 1); | |||||
| unicodeName = new byte[length - 5]; | |||||
| System.arraycopy(buffer, offset + 5, unicodeName, 0, length - 5); | |||||
| data = null; | |||||
| } | |||||
| } | |||||
| @@ -44,6 +44,8 @@ public class ExtraFieldUtils { | |||||
| implementations = new HashMap(); | implementations = new HashMap(); | ||||
| register(AsiExtraField.class); | register(AsiExtraField.class); | ||||
| register(JarMarker.class); | register(JarMarker.class); | ||||
| register(UnicodePathExtraField.class); | |||||
| register(UnicodeCommentExtraField.class); | |||||
| } | } | ||||
| /** | /** | ||||
| @@ -0,0 +1,60 @@ | |||||
| /* | |||||
| * Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| * contributor license agreements. See the NOTICE file distributed with | |||||
| * this work for additional information regarding copyright ownership. | |||||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| * (the "License"); you may not use this file except in compliance with | |||||
| * the License. You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| * | |||||
| */ | |||||
| package org.apache.tools.zip; | |||||
| /** | |||||
| * Info-ZIP Unicode Comment Extra Field (0x6375): | |||||
| * | |||||
| * <p>Stores the UTF-8 version of the file comment as stored in the | |||||
| * central directory header.</p> | |||||
| * | |||||
| * <pre> | |||||
| * Value Size Description | |||||
| * ----- ---- ----------- | |||||
| * (UCom) 0x6375 Short tag for this extra block type ("uc") | |||||
| * TSize Short total data size for this block | |||||
| * Version 1 byte version of this extra field, currently 1 | |||||
| * ComCRC32 4 bytes Comment Field CRC32 Checksum | |||||
| * UnicodeCom Variable UTF-8 version of the entry comment | |||||
| * </pre> | |||||
| */ | |||||
| public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { | |||||
| public static final ZipShort UCOM_ID = new ZipShort(0x6375); | |||||
| public UnicodeCommentExtraField () { | |||||
| } | |||||
| /** | |||||
| * Assemble as unicode comment extension form the comment and | |||||
| * encoding of the orginal zip entry. | |||||
| * | |||||
| * @param name The file name | |||||
| * @param zipEncoding The encoding of the comment in the zip file, | |||||
| * usually <code>"CP437"</code>. | |||||
| */ | |||||
| public UnicodeCommentExtraField(String name, String zipEncoding) { | |||||
| super(name, zipEncoding); | |||||
| } | |||||
| public ZipShort getHeaderId() { | |||||
| return UCOM_ID; | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,59 @@ | |||||
| /* | |||||
| * Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| * contributor license agreements. See the NOTICE file distributed with | |||||
| * this work for additional information regarding copyright ownership. | |||||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| * (the "License"); you may not use this file except in compliance with | |||||
| * the License. You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| * | |||||
| */ | |||||
| package org.apache.tools.zip; | |||||
| /** | |||||
| * Info-ZIP Unicode Path Extra Field (0x7075): | |||||
| * | |||||
| * <p>Stores the UTF-8 version of the file name field as stored in the | |||||
| * local header and central directory header.</p> | |||||
| * | |||||
| * <pre> | |||||
| * Value Size Description | |||||
| * ----- ---- ----------- | |||||
| * (UPath) 0x7075 Short tag for this extra block type ("up") | |||||
| * TSize Short total data size for this block | |||||
| * Version 1 byte version of this extra field, currently 1 | |||||
| * NameCRC32 4 bytes File Name Field CRC32 Checksum | |||||
| * UnicodeName Variable UTF-8 version of the entry File Name | |||||
| * </pre> | |||||
| */ | |||||
| public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||||
| public static final ZipShort UPATH_ID = new ZipShort(0x7075); | |||||
| public UnicodePathExtraField () { | |||||
| } | |||||
| /** | |||||
| * Assemble as unicode path extension form the name and encoding | |||||
| * of the orginal zip entry. | |||||
| * | |||||
| * @param name The file name | |||||
| * @param zipEncoding The encoding of the filename in the zip | |||||
| * file, usually <code>"CP437"</code>. | |||||
| */ | |||||
| public UnicodePathExtraField(String name, String zipEncoding) { | |||||
| super(name, zipEncoding); | |||||
| } | |||||
| public ZipShort getHeaderId() { | |||||
| return UPATH_ID; | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,171 @@ | |||||
| /* | |||||
| * Licensed to the Apache Software Foundation (ASF) under one or more | |||||
| * contributor license agreements. See the NOTICE file distributed with | |||||
| * this work for additional information regarding copyright ownership. | |||||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
| * (the "License"); you may not use this file except in compliance with | |||||
| * the License. You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| * | |||||
| */ | |||||
| package org.apache.tools.zip; | |||||
| import java.nio.ByteBuffer; | |||||
| import java.nio.CharBuffer; | |||||
| import java.nio.charset.Charset; | |||||
| import java.nio.charset.CharsetEncoder; | |||||
| import java.nio.charset.CoderResult; | |||||
| import java.nio.charset.CodingErrorAction; | |||||
| /** | |||||
| * Static helper functions for robustly encoding filenames in zip files. | |||||
| */ | |||||
| abstract class ZipEncodingHelper { | |||||
| /** | |||||
| * Grow a byte buffer, so it has a minimal capacity or at least | |||||
| * the double capacity of the original buffer | |||||
| * | |||||
| * @param b The original buffer. | |||||
| * @param newCapacity The minimal requested new capacity. | |||||
| * @return A byte buffer <code>r</code> with | |||||
| * <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and | |||||
| * all the data contained in <code>b</code> copied to the beginning | |||||
| * of <code>r</code>. | |||||
| * | |||||
| */ | |||||
| static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) { | |||||
| b.limit(b.position()); | |||||
| b.rewind(); | |||||
| int c2 = b.capacity() * 2; | |||||
| ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2); | |||||
| on.put(b); | |||||
| return on; | |||||
| } | |||||
| /** | |||||
| * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as | |||||
| * ASCII bytes. | |||||
| */ | |||||
| private static final byte[] HEX_DIGITS = | |||||
| new byte [] { | |||||
| 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, | |||||
| 0x42, 0x43, 0x44, 0x45, 0x46 | |||||
| }; | |||||
| /** | |||||
| * Encode a filename or a comment to a byte array suitable for | |||||
| * storing it to a serialized zip entry. | |||||
| * | |||||
| * Examples (in pseudo-notation, right hand side is C-style notation): | |||||
| * <pre> | |||||
| * encodeName("\u20AC_for_Dollar.txt","CP437") = "%U20AC_for_Dollar.txt" | |||||
| * encodeName("\u00D6lf\u00E4sser.txt","CP437") = "\231lf\204sser.txt" | |||||
| * </pre> | |||||
| * | |||||
| * @param name The filename or comment with possible non-ASCII | |||||
| * unicode characters. | |||||
| * @param encoding A valid encoding name. The standard zip | |||||
| * encoding is <code>"CP437"</code>, | |||||
| * <code>"UTF-8"</code> is supported in ZIP file | |||||
| * version <code>6.3</code> or later. | |||||
| * @return A byte array containing the mapped file | |||||
| * name. Unmappable characters or malformed character | |||||
| * sequences are mapped to a sequence of utf-16 words | |||||
| * encoded in the format <code>%Uxxxx</code>. | |||||
| */ | |||||
| static final byte[] encodeName(String name, String encoding) { | |||||
| Charset cs = Charset.forName(encoding); | |||||
| CharsetEncoder enc = cs.newEncoder(); | |||||
| enc.onMalformedInput(CodingErrorAction.REPORT); | |||||
| enc.onUnmappableCharacter(CodingErrorAction.REPORT); | |||||
| CharBuffer cb = CharBuffer.wrap(name); | |||||
| ByteBuffer out = ByteBuffer.allocate(name.length() | |||||
| + (name.length() + 1) / 2); | |||||
| while (cb.remaining() > 0) { | |||||
| CoderResult res = enc.encode(cb, out,true); | |||||
| if (res.isUnmappable() || res.isMalformed()) { | |||||
| // write the unmappable characters in utf-16 | |||||
| // pseudo-URL encoding style to ByteBuffer. | |||||
| if (res.length() * 6 > out.remaining()) { | |||||
| out = growBuffer(out,out.position() + res.length() * 6); | |||||
| } | |||||
| for (int i=0; i<res.length(); ++i) { | |||||
| out.put((byte) '%'); | |||||
| out.put((byte) 'U'); | |||||
| char c = cb.get(); | |||||
| out.put(HEX_DIGITS[(c >> 12)&0x0f]); | |||||
| out.put(HEX_DIGITS[(c >> 8)&0x0f]); | |||||
| out.put(HEX_DIGITS[(c >> 4)&0x0f]); | |||||
| out.put(HEX_DIGITS[c & 0x0f]); | |||||
| } | |||||
| } else if (res.isOverflow()) { | |||||
| out = growBuffer(out, 0); | |||||
| } else if (res.isUnderflow()) { | |||||
| enc.flush(out); | |||||
| break; | |||||
| } | |||||
| } | |||||
| byte [] ret = new byte[out.position()]; | |||||
| out.rewind(); | |||||
| out.get(ret); | |||||
| return ret; | |||||
| } | |||||
| /** | |||||
| * Return, whether a filename or a comment may be encoded to a | |||||
| * byte array suitable for storing it to a serialized zip entry | |||||
| * without any losses. | |||||
| * | |||||
| * Examples (in pseudo-notation, right hand side is C-style notation): | |||||
| * <pre> | |||||
| * canEncodeName("\u20AC_for_Dollar.txt","CP437") = false | |||||
| * canEncodeName("\u20AC_for_Dollar.txt","UTF-8") = true | |||||
| * canEncodeName("\u00D6lf\u00E4sser.txt","CP437") = true | |||||
| * </pre> | |||||
| * | |||||
| * @param name The filename or comment with possible non-ASCII | |||||
| * unicode characters. | |||||
| * @param encoding A valid encoding name. The standard zip | |||||
| * encoding is <code>"CP437"</code>, | |||||
| * <code>"UTF-8"</code> is supported in ZIP file | |||||
| * version <code>6.3</code> or later. | |||||
| * @return Whether the given encoding may encode the given name. | |||||
| */ | |||||
| static final boolean canEncodeName(String name, String encoding) { | |||||
| Charset cs = Charset.forName(encoding); | |||||
| CharsetEncoder enc = cs.newEncoder(); | |||||
| enc.onMalformedInput(CodingErrorAction.REPORT); | |||||
| enc.onUnmappableCharacter(CodingErrorAction.REPORT); | |||||
| return enc.canEncode(name); | |||||
| } | |||||
| } | |||||