git-svn-id: https://svn.apache.org/repos/asf/ant/core/trunk@745531 13f79535-47bb-0310-9956-ffa450edef68master
@@ -348,6 +348,9 @@ Fixed bugs: | |||
VM was running) for files with an unexpected internal structure. | |||
Bugzilla Report 46559. | |||
* The zip package now supports the extra fields invented by InfoZIP | |||
in order to store Unicode file names and comments. | |||
Other changes: | |||
-------------- | |||
* A HostInfo task was added performing information on hosts, including info on | |||
@@ -0,0 +1,147 @@ | |||
/* | |||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||
* contributor license agreements. See the NOTICE file distributed with | |||
* this work for additional information regarding copyright ownership. | |||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||
* (the "License"); you may not use this file except in compliance with | |||
* the License. You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
* | |||
*/ | |||
package org.apache.tools.zip; | |||
import java.io.UnsupportedEncodingException; | |||
import java.util.zip.CRC32; | |||
import java.util.zip.ZipException; | |||
/** | |||
* A common base class for Unicode extra information extra fields. | |||
*/ | |||
public abstract class AbstractUnicodeExtraField implements ZipExtraField { | |||
private long nameCRC32; | |||
private byte[] unicodeName; | |||
private byte[] data; | |||
protected AbstractUnicodeExtraField() { | |||
} | |||
/** | |||
* Assemble as unicode path extension form the name and encoding | |||
* of the orginal zip entry. | |||
* | |||
* @param name The file name or comment. | |||
* @param zipEncoding The encoding of the filenames in the zip | |||
* file, usually <code>"CP437"</code>. | |||
*/ | |||
protected AbstractUnicodeExtraField(String name, String zipEncoding) { | |||
byte[] filename = ZipEncodingHelper.encodeName(name, zipEncoding); | |||
CRC32 crc32 = new CRC32(); | |||
crc32.update(filename); | |||
nameCRC32 = crc32.getValue(); | |||
try { | |||
unicodeName = name.getBytes("UTF-8"); | |||
} catch (UnsupportedEncodingException e) { | |||
throw new RuntimeException("FATAL: UTF-8 encoding not supported.", | |||
e); | |||
} | |||
} | |||
private void assembleData() { | |||
if (unicodeName == null) { | |||
return; | |||
} | |||
data = new byte[5 + unicodeName.length]; | |||
// version 1 | |||
data[0] = 0x01; | |||
System.arraycopy(ZipLong.getBytes(nameCRC32), 0, data, 1, 4); | |||
System.arraycopy(unicodeName, 0, data, 5, unicodeName.length); | |||
} | |||
/** | |||
* @return The CRC32 checksum of the filename or comment as | |||
* encoded in the central directory of the zip file. | |||
*/ | |||
public long getNameCRC32() { | |||
return nameCRC32; | |||
} | |||
/** | |||
* @param nameCRC32 The CRC32 checksum of the filename as encoded | |||
* in the central directory of the zip file to set. | |||
*/ | |||
public void setNameCRC32(long nameCRC32) { | |||
nameCRC32 = nameCRC32; | |||
data = null; | |||
} | |||
/** | |||
* @return The utf-8 encoded name. | |||
*/ | |||
public byte[] getUnicodeName() { | |||
return unicodeName; | |||
} | |||
/** | |||
* @param unicodeName The utf-8 encoded name to set. | |||
*/ | |||
public void setUnicodeName(byte[] unicodeName) { | |||
unicodeName = unicodeName; | |||
data = null; | |||
} | |||
public byte[] getCentralDirectoryData() { | |||
if (data == null) { | |||
this.assembleData(); | |||
} | |||
return data; | |||
} | |||
public ZipShort getCentralDirectoryLength() { | |||
if (data == null) { | |||
assembleData(); | |||
} | |||
return new ZipShort(data.length); | |||
} | |||
public byte[] getLocalFileDataData() { | |||
return getCentralDirectoryData(); | |||
} | |||
public ZipShort getLocalFileDataLength() { | |||
return getCentralDirectoryLength(); | |||
} | |||
public void parseFromLocalFileData(byte[] buffer, int offset, int length) | |||
throws ZipException { | |||
if (length < 5) { | |||
throw new ZipException("UniCode path extra data must have at least" | |||
+ " 5 bytes."); | |||
} | |||
int version = buffer[offset]; | |||
if (version != 0x01) { | |||
throw new ZipException("Unsupported version [" + version | |||
+ "] for UniCode path extra data."); | |||
} | |||
nameCRC32 = ZipLong.getValue(buffer, offset + 1); | |||
unicodeName = new byte[length - 5]; | |||
System.arraycopy(buffer, offset + 5, unicodeName, 0, length - 5); | |||
data = null; | |||
} | |||
} |
@@ -44,6 +44,8 @@ public class ExtraFieldUtils { | |||
implementations = new HashMap(); | |||
register(AsiExtraField.class); | |||
register(JarMarker.class); | |||
register(UnicodePathExtraField.class); | |||
register(UnicodeCommentExtraField.class); | |||
} | |||
/** | |||
@@ -0,0 +1,60 @@ | |||
/* | |||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||
* contributor license agreements. See the NOTICE file distributed with | |||
* this work for additional information regarding copyright ownership. | |||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||
* (the "License"); you may not use this file except in compliance with | |||
* the License. You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
* | |||
*/ | |||
package org.apache.tools.zip; | |||
/** | |||
* Info-ZIP Unicode Comment Extra Field (0x6375): | |||
* | |||
* <p>Stores the UTF-8 version of the file comment as stored in the | |||
* central directory header.</p> | |||
* | |||
* <pre> | |||
* Value Size Description | |||
* ----- ---- ----------- | |||
* (UCom) 0x6375 Short tag for this extra block type ("uc") | |||
* TSize Short total data size for this block | |||
* Version 1 byte version of this extra field, currently 1 | |||
* ComCRC32 4 bytes Comment Field CRC32 Checksum | |||
* UnicodeCom Variable UTF-8 version of the entry comment | |||
* </pre> | |||
*/ | |||
public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { | |||
public static final ZipShort UCOM_ID = new ZipShort(0x6375); | |||
public UnicodeCommentExtraField () { | |||
} | |||
/** | |||
* Assemble as unicode comment extension form the comment and | |||
* encoding of the orginal zip entry. | |||
* | |||
* @param name The file name | |||
* @param zipEncoding The encoding of the comment in the zip file, | |||
* usually <code>"CP437"</code>. | |||
*/ | |||
public UnicodeCommentExtraField(String name, String zipEncoding) { | |||
super(name, zipEncoding); | |||
} | |||
public ZipShort getHeaderId() { | |||
return UCOM_ID; | |||
} | |||
} |
@@ -0,0 +1,59 @@ | |||
/* | |||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||
* contributor license agreements. See the NOTICE file distributed with | |||
* this work for additional information regarding copyright ownership. | |||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||
* (the "License"); you may not use this file except in compliance with | |||
* the License. You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
* | |||
*/ | |||
package org.apache.tools.zip; | |||
/** | |||
* Info-ZIP Unicode Path Extra Field (0x7075): | |||
* | |||
* <p>Stores the UTF-8 version of the file name field as stored in the | |||
* local header and central directory header.</p> | |||
* | |||
* <pre> | |||
* Value Size Description | |||
* ----- ---- ----------- | |||
* (UPath) 0x7075 Short tag for this extra block type ("up") | |||
* TSize Short total data size for this block | |||
* Version 1 byte version of this extra field, currently 1 | |||
* NameCRC32 4 bytes File Name Field CRC32 Checksum | |||
* UnicodeName Variable UTF-8 version of the entry File Name | |||
* </pre> | |||
*/ | |||
public class UnicodePathExtraField extends AbstractUnicodeExtraField { | |||
public static final ZipShort UPATH_ID = new ZipShort(0x7075); | |||
public UnicodePathExtraField () { | |||
} | |||
/** | |||
* Assemble as unicode path extension form the name and encoding | |||
* of the orginal zip entry. | |||
* | |||
* @param name The file name | |||
* @param zipEncoding The encoding of the filename in the zip | |||
* file, usually <code>"CP437"</code>. | |||
*/ | |||
public UnicodePathExtraField(String name, String zipEncoding) { | |||
super(name, zipEncoding); | |||
} | |||
public ZipShort getHeaderId() { | |||
return UPATH_ID; | |||
} | |||
} |
@@ -0,0 +1,171 @@ | |||
/* | |||
* Licensed to the Apache Software Foundation (ASF) under one or more | |||
* contributor license agreements. See the NOTICE file distributed with | |||
* this work for additional information regarding copyright ownership. | |||
* The ASF licenses this file to You under the Apache License, Version 2.0 | |||
* (the "License"); you may not use this file except in compliance with | |||
* the License. You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
* | |||
*/ | |||
package org.apache.tools.zip; | |||
import java.nio.ByteBuffer; | |||
import java.nio.CharBuffer; | |||
import java.nio.charset.Charset; | |||
import java.nio.charset.CharsetEncoder; | |||
import java.nio.charset.CoderResult; | |||
import java.nio.charset.CodingErrorAction; | |||
/** | |||
* Static helper functions for robustly encoding filenames in zip files. | |||
*/ | |||
abstract class ZipEncodingHelper { | |||
/** | |||
* Grow a byte buffer, so it has a minimal capacity or at least | |||
* the double capacity of the original buffer | |||
* | |||
* @param b The original buffer. | |||
* @param newCapacity The minimal requested new capacity. | |||
* @return A byte buffer <code>r</code> with | |||
* <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and | |||
* all the data contained in <code>b</code> copied to the beginning | |||
* of <code>r</code>. | |||
* | |||
*/ | |||
static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) { | |||
b.limit(b.position()); | |||
b.rewind(); | |||
int c2 = b.capacity() * 2; | |||
ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2); | |||
on.put(b); | |||
return on; | |||
} | |||
/** | |||
* The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as | |||
* ASCII bytes. | |||
*/ | |||
private static final byte[] HEX_DIGITS = | |||
new byte [] { | |||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, | |||
0x42, 0x43, 0x44, 0x45, 0x46 | |||
}; | |||
/** | |||
* Encode a filename or a comment to a byte array suitable for | |||
* storing it to a serialized zip entry. | |||
* | |||
* Examples (in pseudo-notation, right hand side is C-style notation): | |||
* <pre> | |||
* encodeName("\u20AC_for_Dollar.txt","CP437") = "%U20AC_for_Dollar.txt" | |||
* encodeName("\u00D6lf\u00E4sser.txt","CP437") = "\231lf\204sser.txt" | |||
* </pre> | |||
* | |||
* @param name The filename or comment with possible non-ASCII | |||
* unicode characters. | |||
* @param encoding A valid encoding name. The standard zip | |||
* encoding is <code>"CP437"</code>, | |||
* <code>"UTF-8"</code> is supported in ZIP file | |||
* version <code>6.3</code> or later. | |||
* @return A byte array containing the mapped file | |||
* name. Unmappable characters or malformed character | |||
* sequences are mapped to a sequence of utf-16 words | |||
* encoded in the format <code>%Uxxxx</code>. | |||
*/ | |||
static final byte[] encodeName(String name, String encoding) { | |||
Charset cs = Charset.forName(encoding); | |||
CharsetEncoder enc = cs.newEncoder(); | |||
enc.onMalformedInput(CodingErrorAction.REPORT); | |||
enc.onUnmappableCharacter(CodingErrorAction.REPORT); | |||
CharBuffer cb = CharBuffer.wrap(name); | |||
ByteBuffer out = ByteBuffer.allocate(name.length() | |||
+ (name.length() + 1) / 2); | |||
while (cb.remaining() > 0) { | |||
CoderResult res = enc.encode(cb, out,true); | |||
if (res.isUnmappable() || res.isMalformed()) { | |||
// write the unmappable characters in utf-16 | |||
// pseudo-URL encoding style to ByteBuffer. | |||
if (res.length() * 6 > out.remaining()) { | |||
out = growBuffer(out,out.position() + res.length() * 6); | |||
} | |||
for (int i=0; i<res.length(); ++i) { | |||
out.put((byte) '%'); | |||
out.put((byte) 'U'); | |||
char c = cb.get(); | |||
out.put(HEX_DIGITS[(c >> 12)&0x0f]); | |||
out.put(HEX_DIGITS[(c >> 8)&0x0f]); | |||
out.put(HEX_DIGITS[(c >> 4)&0x0f]); | |||
out.put(HEX_DIGITS[c & 0x0f]); | |||
} | |||
} else if (res.isOverflow()) { | |||
out = growBuffer(out, 0); | |||
} else if (res.isUnderflow()) { | |||
enc.flush(out); | |||
break; | |||
} | |||
} | |||
byte [] ret = new byte[out.position()]; | |||
out.rewind(); | |||
out.get(ret); | |||
return ret; | |||
} | |||
/** | |||
* Return, whether a filename or a comment may be encoded to a | |||
* byte array suitable for storing it to a serialized zip entry | |||
* without any losses. | |||
* | |||
* Examples (in pseudo-notation, right hand side is C-style notation): | |||
* <pre> | |||
* canEncodeName("\u20AC_for_Dollar.txt","CP437") = false | |||
* canEncodeName("\u20AC_for_Dollar.txt","UTF-8") = true | |||
* canEncodeName("\u00D6lf\u00E4sser.txt","CP437") = true | |||
* </pre> | |||
* | |||
* @param name The filename or comment with possible non-ASCII | |||
* unicode characters. | |||
* @param encoding A valid encoding name. The standard zip | |||
* encoding is <code>"CP437"</code>, | |||
* <code>"UTF-8"</code> is supported in ZIP file | |||
* version <code>6.3</code> or later. | |||
* @return Whether the given encoding may encode the given name. | |||
*/ | |||
static final boolean canEncodeName(String name, String encoding) { | |||
Charset cs = Charset.forName(encoding); | |||
CharsetEncoder enc = cs.newEncoder(); | |||
enc.onMalformedInput(CodingErrorAction.REPORT); | |||
enc.onUnmappableCharacter(CodingErrorAction.REPORT); | |||
return enc.canEncode(name); | |||
} | |||
} |