You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ZipEncodingHelper.java 9.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package org.apache.tools.zip;
  19. import java.nio.ByteBuffer;
  20. import java.nio.charset.Charset;
  21. import java.nio.charset.UnsupportedCharsetException;
  22. import java.util.HashMap;
  23. import java.util.Locale;
  24. import java.util.Map;
  25. /**
  26. * Static helper functions for robustly encoding filenames in zip files.
  27. */
  28. abstract class ZipEncodingHelper {
  29. /**
  30. * A class, which holds the high characters of a simple encoding
  31. * and lazily instantiates a Simple8BitZipEncoding instance in a
  32. * thread-safe manner.
  33. */
  34. private static class SimpleEncodingHolder {
  35. private final char [] highChars;
  36. private Simple8BitZipEncoding encoding;
  37. /**
  38. * Instantiate a simple encoding holder.
  39. *
  40. * @param highChars The characters for byte codes 128 to 255.
  41. *
  42. * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
  43. */
  44. SimpleEncodingHolder(char [] highChars) {
  45. this.highChars = highChars;
  46. }
  47. /**
  48. * @return The associated {@link Simple8BitZipEncoding}, which
  49. * is instantiated if not done so far.
  50. */
  51. public synchronized Simple8BitZipEncoding getEncoding() {
  52. if (this.encoding == null) {
  53. this.encoding = new Simple8BitZipEncoding(this.highChars);
  54. }
  55. return this.encoding;
  56. }
  57. }
  58. private static final Map simpleEncodings;
  59. static {
  60. simpleEncodings = new HashMap();
  61. char[] cp437_high_chars =
  62. new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
  63. 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
  64. 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
  65. 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
  66. 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
  67. 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
  68. 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
  69. 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
  70. 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
  71. 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
  72. 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
  73. 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
  74. 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
  75. 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
  76. 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
  77. 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
  78. 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
  79. 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
  80. 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
  81. 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
  82. 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
  83. 0x25a0, 0x00a0 };
  84. SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
  85. simpleEncodings.put("CP437",cp437);
  86. simpleEncodings.put("Cp437",cp437);
  87. simpleEncodings.put("cp437",cp437);
  88. simpleEncodings.put("IBM437",cp437);
  89. simpleEncodings.put("ibm437",cp437);
  90. char[] cp850_high_chars =
  91. new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
  92. 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
  93. 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
  94. 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
  95. 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
  96. 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
  97. 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
  98. 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
  99. 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
  100. 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
  101. 0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
  102. 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
  103. 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
  104. 0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
  105. 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
  106. 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
  107. 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
  108. 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
  109. 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
  110. 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
  111. 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
  112. 0x25a0, 0x00a0 };
  113. SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
  114. simpleEncodings.put("CP850",cp850);
  115. simpleEncodings.put("Cp850",cp850);
  116. simpleEncodings.put("cp850",cp850);
  117. simpleEncodings.put("IBM850",cp850);
  118. simpleEncodings.put("ibm850",cp850);
  119. }
  120. /**
  121. * Grow a byte buffer, so it has a minimal capacity or at least
  122. * the double capacity of the original buffer
  123. *
  124. * @param b The original buffer.
  125. * @param newCapacity The minimal requested new capacity.
  126. * @return A byte buffer <code>r</code> with
  127. * <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
  128. * all the data contained in <code>b</code> copied to the beginning
  129. * of <code>r</code>.
  130. *
  131. */
  132. static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) {
  133. b.limit(b.position());
  134. b.rewind();
  135. int c2 = b.capacity() * 2;
  136. ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);
  137. on.put(b);
  138. return on;
  139. }
  140. /**
  141. * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
  142. * ASCII bytes.
  143. */
  144. private static final byte[] HEX_DIGITS =
  145. new byte [] {
  146. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41,
  147. 0x42, 0x43, 0x44, 0x45, 0x46
  148. };
  149. /**
  150. * Append <code>%Uxxxx</code> to the given byte buffer.
  151. * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
  152. *
  153. * @param bb The byte buffer to write to.
  154. * @param c The character to write.
  155. */
  156. static void appendSurrogate(ByteBuffer bb, char c) {
  157. bb.put((byte) '%');
  158. bb.put((byte) 'U');
  159. bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
  160. bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
  161. bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
  162. bb.put(HEX_DIGITS[c & 0x0f]);
  163. }
  164. /**
  165. * name of the encoding UTF-8
  166. */
  167. static final String UTF8 = "UTF8";
  168. /**
  169. * variant name of the encoding UTF-8 used for comparisions.
  170. */
  171. private static final String UTF_DASH_8 = "utf-8";
  172. /**
  173. * name of the encoding UTF-8
  174. */
  175. static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
  176. /**
  177. * Instantiates a zip encoding.
  178. *
  179. * @param name The name of the zip encoding. Specify <code>null</code> for
  180. * the platform's default encoding.
  181. * @return A zip encoding for the given encoding name.
  182. */
  183. static ZipEncoding getZipEncoding(String name) {
  184. // fallback encoding is good enough for utf-8.
  185. if (isUTF8(name)) {
  186. return UTF8_ZIP_ENCODING;
  187. }
  188. if (name == null) {
  189. return new FallbackZipEncoding();
  190. }
  191. SimpleEncodingHolder h =
  192. (SimpleEncodingHolder) simpleEncodings.get(name);
  193. if (h!=null) {
  194. return h.getEncoding();
  195. }
  196. try {
  197. Charset cs = Charset.forName(name);
  198. return new NioZipEncoding(cs);
  199. } catch (UnsupportedCharsetException e) {
  200. return new FallbackZipEncoding(name);
  201. }
  202. }
  203. /**
  204. * Whether a given encoding - or the platform's default encoding
  205. * if the parameter is null - is UTF-8.
  206. */
  207. static boolean isUTF8(String encoding) {
  208. if (encoding == null) {
  209. // check platform's default encoding
  210. encoding = System.getProperty("file.encoding");
  211. }
  212. return UTF8.equalsIgnoreCase(encoding)
  213. || UTF_DASH_8.equalsIgnoreCase(encoding);
  214. }
  215. }