git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@745531 13f79535-47bb-0310-9956-ffa450edef68master
@@ -348,6 +348,9 @@ Fixed bugs: | |||||
VM was running) for files with an unexpected internal structure. | VM was running) for files with an unexpected internal structure. | ||||
Bugzilla Report 46559. | Bugzilla Report 46559. | ||||
* The zip package now supports the extra fields invented by InfoZIP | |||||
in order to store Unicode file names and comments. | |||||
Other changes: | Other changes: | ||||
-------------- | -------------- | ||||
* A HostInfo task was added performing information on hosts, including info on | * A HostInfo task was added performing information on hosts, including info on | ||||
@@ -0,0 +1,147 @@ | |||||
/* | |||||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||||
* contributor license agreements. See the NOTICE file distributed with | |||||
* this work for additional information regarding copyright ownership. | |||||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
* (the "License"); you may not use this file except in compliance with | |||||
* the License. You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
* | |||||
*/ | |||||
package org.apache.tools.zip; | |||||
import java.io.UnsupportedEncodingException; | |||||
import java.util.zip.CRC32; | |||||
import java.util.zip.ZipException; | |||||
/** | |||||
* A common base class for Unicode extra information extra fields. | |||||
*/ | |||||
public abstract class AbstractUnicodeExtraField implements ZipExtraField { | |||||
private long nameCRC32; | |||||
private byte[] unicodeName; | |||||
private byte[] data; | |||||
protected AbstractUnicodeExtraField() { | |||||
} | |||||
/** | |||||
* Assemble as unicode path extension form the name and encoding | |||||
* of the orginal zip entry. | |||||
* | |||||
* @param name The file name or comment. | |||||
* @param zipEncoding The encoding of the filenames in the zip | |||||
* file, usually <code>"CP437"</code>. | |||||
*/ | |||||
protected AbstractUnicodeExtraField(String name, String zipEncoding) { | |||||
byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding); | |||||
CRC32 crc32 = new CRC32(); | |||||
crc32.update(filename); | |||||
nameCRC32 = crc32.getValue(); | |||||
try { | |||||
unicodeName = name.getBytes("UTF-8"); | |||||
} catch (UnsupportedEncodingException e) { | |||||
throw new RuntimeException("FATAL: UTF-8 encoding not supported.", | |||||
e); | |||||
} | |||||
} | |||||
private void assembleData() { | |||||
if (unicodeName == null) { | |||||
return; | |||||
} | |||||
data = new byte[5 + unicodeName.length]; | |||||
// version 1 | |||||
data[0] = 0x01; | |||||
System.arraycopy(ZipLong.getBytes(nameCRC32), 0, data, 1, 4); | |||||
System.arraycopy(unicodeName, 0, data, 5, unicodeName.length); | |||||
} | |||||
/** | |||||
* @return The CRC32 checksum of the filename or comment as | |||||
* encoded in the central directory of the zip file. | |||||
*/ | |||||
public long getNameCRC32() { | |||||
return nameCRC32; | |||||
} | |||||
/** | |||||
* @param nameCRC32 The CRC32 checksum of the filename as encoded | |||||
* in the central directory of the zip file to set. | |||||
*/ | |||||
public void setNameCRC32(long nameCRC32) { | |||||
nameCRC32 = nameCRC32; | |||||
data = null; | |||||
} | |||||
/** | |||||
* @return The utf-8 encoded name. | |||||
*/ | |||||
public byte[] getUnicodeName() { | |||||
return unicodeName; | |||||
} | |||||
/** | |||||
* @param unicodeName The utf-8 encoded name to set. | |||||
*/ | |||||
public void setUnicodeName(byte[] unicodeName) { | |||||
unicodeName = unicodeName; | |||||
data = null; | |||||
} | |||||
public byte[] getCentralDirectoryData() { | |||||
if (data == null) { | |||||
this.assembleData(); | |||||
} | |||||
return data; | |||||
} | |||||
public ZipShort getCentralDirectoryLength() { | |||||
if (data == null) { | |||||
assembleData(); | |||||
} | |||||
return new ZipShort(data.length); | |||||
} | |||||
public byte[] getLocalFileDataData() { | |||||
return getCentralDirectoryData(); | |||||
} | |||||
public ZipShort getLocalFileDataLength() { | |||||
return getCentralDirectoryLength(); | |||||
} | |||||
public void parseFromLocalFileData(byte[] buffer, int offset, int length) | |||||
throws ZipException { | |||||
if (length < 5) { | |||||
throw new ZipException("UniCode path extra data must have at least" | |||||
+ " 5 bytes."); | |||||
} | |||||
int version = buffer[offset]; | |||||
if (version != 0x01) { | |||||
throw new ZipException("Unsupported version [" + version | |||||
+ "] for UniCode path extra data."); | |||||
} | |||||
nameCRC32 = ZipLong.getValue(buffer, offset + 1); | |||||
unicodeName = new byte[length - 5]; | |||||
System.arraycopy(buffer, offset + 5, unicodeName, 0, length - 5); | |||||
data = null; | |||||
} | |||||
} |
@@ -44,6 +44,8 @@ public class ExtraFieldUtils { | |||||
implementations = new HashMap(); | implementations = new HashMap(); | ||||
register(AsiExtraField.class); | register(AsiExtraField.class); | ||||
register(JarMarker.class); | register(JarMarker.class); | ||||
register(UnicodePathExtraField.class); | |||||
register(UnicodeCommentExtraField.class); | |||||
} | } | ||||
/** | /** | ||||
@@ -0,0 +1,60 @@ | |||||
/* | |||||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||||
* contributor license agreements. See the NOTICE file distributed with | |||||
* this work for additional information regarding copyright ownership. | |||||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
* (the "License"); you may not use this file except in compliance with | |||||
* the License. You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
* | |||||
*/ | |||||
package org.apache.tools.zip; | |||||
/** | |||||
* Info-ZIP Unicode Comment Extra Field (0x6375): | |||||
* | |||||
* <p>Stores the UTF-8 version of the file comment as stored in the | |||||
* central directory header.</p> | |||||
* | |||||
* <pre> | |||||
* Value Size Description | |||||
* ----- ---- ----------- | |||||
* (UCom) 0x6375 Short tag for this extra block type ("uc") | |||||
* TSize Short total data size for this block | |||||
* Version 1 byte version of this extra field, currently 1 | |||||
* ComCRC32 4 bytes Comment Field CRC32 Checksum | |||||
* UnicodeCom Variable UTF-8 version of the entry comment | |||||
* </pre> | |||||
*/ | |||||
public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { | |||||
public static final ZipShort UCOM_ID = new ZipShort(0x6375); | |||||
public UnicodeCommentExtraField () { | |||||
} | |||||
/** | |||||
* Assemble as unicode comment extension form the comment and | |||||
* encoding of the orginal zip entry. | |||||
* | |||||
* @param name The file name | |||||
* @param zipEncoding The encoding of the comment in the zip file, | |||||
* usually <code>"CP437"</code>. | |||||
*/ | |||||
public UnicodeCommentExtraField(String name, String zipEncoding) { | |||||
super(name, zipEncoding); | |||||
} | |||||
public ZipShort getHeaderId() { | |||||
return UCOM_ID; | |||||
} | |||||
} |
@@ -0,0 +1,59 @@ | |||||
/* | |||||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||||
* contributor license agreements. See the NOTICE file distributed with | |||||
* this work for additional information regarding copyright ownership. | |||||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
* (the "License"); you may not use this file except in compliance with | |||||
* the License. You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
* | |||||
*/ | |||||
package org.apache.tools.zip; | |||||
/** | |||||
* Info-ZIP Unicode Path Extra Field (0x7075): | |||||
* | |||||
* <p>Stores the UTF-8 version of the file name field as stored in the | |||||
* local header and central directory header.</p> | |||||
* | |||||
* <pre> | |||||
* Value Size Description | |||||
* ----- ---- ----------- | |||||
* (UPath) 0x7075 Short tag for this extra block type ("up") | |||||
* TSize Short total data size for this block | |||||
* Version 1 byte version of this extra field, currently 1 | |||||
* NameCRC32 4 bytes File Name Field CRC32 Checksum | |||||
* UnicodeName Variable UTF-8 version of the entry File Name | |||||
* </pre> | |||||
*/ | |||||
public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||||
public static final ZipShort UPATH_ID = new ZipShort(0x7075); | |||||
public UnicodePathExtraField () { | |||||
} | |||||
/** | |||||
* Assemble as unicode path extension form the name and encoding | |||||
* of the orginal zip entry. | |||||
* | |||||
* @param name The file name | |||||
* @param zipEncoding The encoding of the filename in the zip | |||||
* file, usually <code>"CP437"</code>. | |||||
*/ | |||||
public UnicodePathExtraField(String name, String zipEncoding) { | |||||
super(name, zipEncoding); | |||||
} | |||||
public ZipShort getHeaderId() { | |||||
return UPATH_ID; | |||||
} | |||||
} |
@@ -0,0 +1,171 @@ | |||||
/* | |||||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||||
* contributor license agreements. See the NOTICE file distributed with | |||||
* this work for additional information regarding copyright ownership. | |||||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
* (the "License"); you may not use this file except in compliance with | |||||
* the License. You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
* | |||||
*/ | |||||
package org.apache.tools.zip; | |||||
import java.nio.ByteBuffer; | |||||
import java.nio.CharBuffer; | |||||
import java.nio.charset.Charset; | |||||
import java.nio.charset.CharsetEncoder; | |||||
import java.nio.charset.CoderResult; | |||||
import java.nio.charset.CodingErrorAction; | |||||
/** | |||||
* Static helper functions for robustly encoding filenames in zip files. | |||||
*/ | |||||
abstract class ZipEncodingHelper { | |||||
/** | |||||
* Grow a byte buffer, so it has a minimal capacity or at least | |||||
* the double capacity of the original buffer | |||||
* | |||||
* @param b The original buffer. | |||||
* @param newCapacity The minimal requested new capacity. | |||||
* @return A byte buffer <code>r</code> with | |||||
* <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and | |||||
* all the data contained in <code>b</code> copied to the beginning | |||||
* of <code>r</code>. | |||||
* | |||||
*/ | |||||
static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) { | |||||
b.limit(b.position()); | |||||
b.rewind(); | |||||
int c2 = b.capacity() * 2; | |||||
ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2); | |||||
on.put(b); | |||||
return on; | |||||
} | |||||
/** | |||||
* The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as | |||||
* ASCII bytes. | |||||
*/ | |||||
private static final byte[] HEX_DIGITS = | |||||
new byte [] { | |||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, | |||||
0x42, 0x43, 0x44, 0x45, 0x46 | |||||
}; | |||||
/** | |||||
* Encode a filename or a comment to a byte array suitable for | |||||
* storing it to a serialized zip entry. | |||||
* | |||||
* Examples (in pseudo-notation, right hand side is C-style notation): | |||||
* <pre> | |||||
* encodeName("\u20AC_for_Dollar.txt","CP437") = "%U20AC_for_Dollar.txt" | |||||
* encodeName("\u00D6lf\u00E4sser.txt","CP437") = "\231lf\204sser.txt" | |||||
* </pre> | |||||
* | |||||
* @param name The filename or comment with possible non-ASCII | |||||
* unicode characters. | |||||
* @param encoding A valid encoding name. The standard zip | |||||
* encoding is <code>"CP437"</code>, | |||||
* <code>"UTF-8"</code> is supported in ZIP file | |||||
* version <code>6.3</code> or later. | |||||
* @return A byte array containing the mapped file | |||||
* name. Unmappable characters or malformed character | |||||
* sequences are mapped to a sequence of utf-16 words | |||||
* encoded in the format <code>%Uxxxx</code>. | |||||
*/ | |||||
static final byte[] encodeName(String name, String encoding) { | |||||
Charset cs = Charset.forName(encoding); | |||||
CharsetEncoder enc = cs.newEncoder(); | |||||
enc.onMalformedInput(CodingErrorAction.REPORT); | |||||
enc.onUnmappableCharacter(CodingErrorAction.REPORT); | |||||
CharBuffer cb = CharBuffer.wrap(name); | |||||
ByteBuffer out = ByteBuffer.allocate(name.length() | |||||
+ (name.length() + 1) / 2); | |||||
while (cb.remaining() > 0) { | |||||
CoderResult res = enc.encode(cb, out,true); | |||||
if (res.isUnmappable() || res.isMalformed()) { | |||||
// write the unmappable characters in utf-16 | |||||
// pseudo-URL encoding style to ByteBuffer. | |||||
if (res.length() * 6 > out.remaining()) { | |||||
out = growBuffer(out,out.position() + res.length() * 6); | |||||
} | |||||
for (int i=0; i<res.length(); ++i) { | |||||
out.put((byte) '%'); | |||||
out.put((byte) 'U'); | |||||
char c = cb.get(); | |||||
out.put(HEX_DIGITS[(c >> 12)&0x0f]); | |||||
out.put(HEX_DIGITS[(c >> 8)&0x0f]); | |||||
out.put(HEX_DIGITS[(c >> 4)&0x0f]); | |||||
out.put(HEX_DIGITS[c & 0x0f]); | |||||
} | |||||
} else if (res.isOverflow()) { | |||||
out = growBuffer(out, 0); | |||||
} else if (res.isUnderflow()) { | |||||
enc.flush(out); | |||||
break; | |||||
} | |||||
} | |||||
byte [] ret = new byte[out.position()]; | |||||
out.rewind(); | |||||
out.get(ret); | |||||
return ret; | |||||
} | |||||
/** | |||||
* Return, whether a filename or a comment may be encoded to a | |||||
* byte array suitable for storing it to a serialized zip entry | |||||
* without any losses. | |||||
* | |||||
* Examples (in pseudo-notation, right hand side is C-style notation): | |||||
* <pre> | |||||
* canEncodeName("\u20AC_for_Dollar.txt","CP437") = false | |||||
* canEncodeName("\u20AC_for_Dollar.txt","UTF-8") = true | |||||
* canEncodeName("\u00D6lf\u00E4sser.txt","CP437") = true | |||||
* </pre> | |||||
* | |||||
* @param name The filename or comment with possible non-ASCII | |||||
* unicode characters. | |||||
* @param encoding A valid encoding name. The standard zip | |||||
* encoding is <code>"CP437"</code>, | |||||
* <code>"UTF-8"</code> is supported in ZIP file | |||||
* version <code>6.3</code> or later. | |||||
* @return Whether the given encoding may encode the given name. | |||||
*/ | |||||
static final boolean canEncodeName(String name, String encoding) { | |||||
Charset cs = Charset.forName(encoding); | |||||
CharsetEncoder enc = cs.newEncoder(); | |||||
enc.onMalformedInput(CodingErrorAction.REPORT); | |||||
enc.onUnmappableCharacter(CodingErrorAction.REPORT); | |||||
return enc.canEncode(name); | |||||
} | |||||
} |