You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ZipFile.java 22 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package org.apache.tools.zip;
  19. import java.io.File;
  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import java.io.RandomAccessFile;
  23. import java.io.UnsupportedEncodingException;
  24. import java.util.Calendar;
  25. import java.util.Collections;
  26. import java.util.Date;
  27. import java.util.Enumeration;
  28. import java.util.HashMap;
  29. import java.util.Map;
  30. import java.util.zip.Inflater;
  31. import java.util.zip.InflaterInputStream;
  32. import java.util.zip.ZipException;
  33. /**
  34. * Replacement for <code>java.util.ZipFile</code>.
  35. *
  36. * <p>This class adds support for file name encodings other than UTF-8
  37. * (which is required to work on ZIP files created by native zip tools
  38. * and is able to skip a preamble like the one found in self
  39. * extracting archives. Furthermore it returns instances of
  40. * <code>org.apache.tools.zip.ZipEntry</code> instead of
  41. * <code>java.util.zip.ZipEntry</code>.</p>
  42. *
  43. * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
  44. * have to reimplement all methods anyway. Like
  45. * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
  46. * covers and supports compressed and uncompressed entries.</p>
  47. *
  48. * <p>The method signatures mimic the ones of
  49. * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
  50. *
  51. * <ul>
  52. * <li>There is no getName method.</li>
  53. * <li>entries has been renamed to getEntries.</li>
  54. * <li>getEntries and getEntry return
  55. * <code>org.apache.tools.zip.ZipEntry</code> instances.</li>
  56. * <li>close is allowed to throw IOException.</li>
  57. * </ul>
  58. *
  59. */
  60. public class ZipFile {
  61. private static final int HASH_SIZE = 509;
  62. private static final int SHORT = 2;
  63. private static final int WORD = 4;
  64. private static final int NIBLET_MASK = 0x0f;
  65. private static final int BYTE_SHIFT = 8;
  66. private static final int POS_0 = 0;
  67. private static final int POS_1 = 1;
  68. private static final int POS_2 = 2;
  69. private static final int POS_3 = 3;
  70. /**
  71. * Maps ZipEntrys to Longs, recording the offsets of the local
  72. * file headers.
  73. */
  74. private final Map entries = new HashMap(HASH_SIZE);
  75. /**
  76. * Maps String to ZipEntrys, name -> actual entry.
  77. */
  78. private final Map nameMap = new HashMap(HASH_SIZE);
  79. private static final class OffsetEntry {
  80. private long headerOffset = -1;
  81. private long dataOffset = -1;
  82. }
  83. /**
  84. * The encoding to use for filenames and the file comment.
  85. *
  86. * <p>For a list of possible values see <a
  87. * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
  88. * Defaults to the platform's default character encoding.</p>
  89. */
  90. private String encoding = null;
  91. /**
  92. * The actual data source.
  93. */
  94. private RandomAccessFile archive;
  95. /**
  96. * Opens the given file for reading, assuming the platform's
  97. * native encoding for file names.
  98. *
  99. * @param f the archive.
  100. *
  101. * @throws IOException if an error occurs while reading the file.
  102. */
  103. public ZipFile(File f) throws IOException {
  104. this(f, null);
  105. }
  106. /**
  107. * Opens the given file for reading, assuming the platform's
  108. * native encoding for file names.
  109. *
  110. * @param name name of the archive.
  111. *
  112. * @throws IOException if an error occurs while reading the file.
  113. */
  114. public ZipFile(String name) throws IOException {
  115. this(new File(name), null);
  116. }
  117. /**
  118. * Opens the given file for reading, assuming the specified
  119. * encoding for file names.
  120. *
  121. * @param name name of the archive.
  122. * @param encoding the encoding to use for file names
  123. *
  124. * @throws IOException if an error occurs while reading the file.
  125. */
  126. public ZipFile(String name, String encoding) throws IOException {
  127. this(new File(name), encoding);
  128. }
  129. /**
  130. * Opens the given file for reading, assuming the specified
  131. * encoding for file names.
  132. *
  133. * @param f the archive.
  134. * @param encoding the encoding to use for file names
  135. *
  136. * @throws IOException if an error occurs while reading the file.
  137. */
  138. public ZipFile(File f, String encoding) throws IOException {
  139. this.encoding = encoding;
  140. archive = new RandomAccessFile(f, "r");
  141. boolean success = false;
  142. try {
  143. populateFromCentralDirectory();
  144. resolveLocalFileHeaderData();
  145. success = true;
  146. } finally {
  147. if (!success) {
  148. try {
  149. archive.close();
  150. } catch (IOException e2) {
  151. // swallow, throw the original exception instead
  152. }
  153. }
  154. }
  155. }
  156. /**
  157. * The encoding to use for filenames and the file comment.
  158. *
  159. * @return null if using the platform's default character encoding.
  160. */
  161. public String getEncoding() {
  162. return encoding;
  163. }
  164. /**
  165. * Closes the archive.
  166. * @throws IOException if an error occurs closing the archive.
  167. */
  168. public void close() throws IOException {
  169. archive.close();
  170. }
  171. /**
  172. * close a zipfile quietly; throw no io fault, do nothing
  173. * on a null parameter
  174. * @param zipfile file to close, can be null
  175. */
  176. public static void closeQuietly(ZipFile zipfile) {
  177. if (zipfile != null) {
  178. try {
  179. zipfile.close();
  180. } catch (IOException e) {
  181. //ignore
  182. }
  183. }
  184. }
  185. /**
  186. * Returns all entries.
  187. * @return all entries as {@link ZipEntry} instances
  188. */
  189. public Enumeration getEntries() {
  190. return Collections.enumeration(entries.keySet());
  191. }
  192. /**
  193. * Returns a named entry - or <code>null</code> if no entry by
  194. * that name exists.
  195. * @param name name of the entry.
  196. * @return the ZipEntry corresponding to the given name - or
  197. * <code>null</code> if not present.
  198. */
  199. public ZipEntry getEntry(String name) {
  200. return (ZipEntry) nameMap.get(name);
  201. }
  202. /**
  203. * Returns an InputStream for reading the contents of the given entry.
  204. * @param ze the entry to get the stream for.
  205. * @return a stream to read the entry from.
  206. * @throws IOException if unable to create an input stream from the zipenty
  207. * @throws ZipException if the zipentry has an unsupported compression method
  208. */
  209. public InputStream getInputStream(ZipEntry ze)
  210. throws IOException, ZipException {
  211. OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
  212. if (offsetEntry == null) {
  213. return null;
  214. }
  215. long start = offsetEntry.dataOffset;
  216. BoundedInputStream bis =
  217. new BoundedInputStream(start, ze.getCompressedSize());
  218. switch (ze.getMethod()) {
  219. case ZipEntry.STORED:
  220. return bis;
  221. case ZipEntry.DEFLATED:
  222. bis.addDummy();
  223. return new InflaterInputStream(bis, new Inflater(true));
  224. default:
  225. throw new ZipException("Found unsupported compression method "
  226. + ze.getMethod());
  227. }
  228. }
  229. private static final int CFH_LEN =
  230. /* version made by */ SHORT
  231. /* version needed to extract */ + SHORT
  232. /* general purpose bit flag */ + SHORT
  233. /* compression method */ + SHORT
  234. /* last mod file time */ + SHORT
  235. /* last mod file date */ + SHORT
  236. /* crc-32 */ + WORD
  237. /* compressed size */ + WORD
  238. /* uncompressed size */ + WORD
  239. /* filename length */ + SHORT
  240. /* extra field length */ + SHORT
  241. /* file comment length */ + SHORT
  242. /* disk number start */ + SHORT
  243. /* internal file attributes */ + SHORT
  244. /* external file attributes */ + WORD
  245. /* relative offset of local header */ + WORD;
  246. /**
  247. * Reads the central directory of the given archive and populates
  248. * the internal tables with ZipEntry instances.
  249. *
  250. * <p>The ZipEntrys will know all data that can be obtained from
  251. * the central directory alone, but not the data that requires the
  252. * local file header or additional data to be read.</p>
  253. */
  254. private void populateFromCentralDirectory()
  255. throws IOException {
  256. positionAtCentralDirectory();
  257. byte[] cfh = new byte[CFH_LEN];
  258. byte[] signatureBytes = new byte[WORD];
  259. archive.readFully(signatureBytes);
  260. long sig = ZipLong.getValue(signatureBytes);
  261. final long cfhSig = ZipLong.getValue(ZipOutputStream.CFH_SIG);
  262. if (sig != cfhSig && startsWithLocalFileHeader()) {
  263. throw new IOException("central directory is empty, can't expand"
  264. + " corrupt archive.");
  265. }
  266. while (sig == cfhSig) {
  267. archive.readFully(cfh);
  268. int off = 0;
  269. ZipEntry ze = new ZipEntry();
  270. int versionMadeBy = ZipShort.getValue(cfh, off);
  271. off += SHORT;
  272. ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
  273. off += SHORT; // skip version info
  274. final int generalPurposeFlag = ZipShort.getValue(cfh, off);
  275. final String entryEncoding =
  276. (generalPurposeFlag & ZipOutputStream.EFS_FLAG) != 0
  277. ? ZipOutputStream.UTF8
  278. : encoding;
  279. off += SHORT;
  280. ze.setMethod(ZipShort.getValue(cfh, off));
  281. off += SHORT;
  282. // FIXME this is actually not very cpu cycles friendly as we are converting from
  283. // dos to java while the underlying Sun implementation will convert
  284. // from java to dos time for internal storage...
  285. long time = dosToJavaTime(ZipLong.getValue(cfh, off));
  286. ze.setTime(time);
  287. off += WORD;
  288. ze.setCrc(ZipLong.getValue(cfh, off));
  289. off += WORD;
  290. ze.setCompressedSize(ZipLong.getValue(cfh, off));
  291. off += WORD;
  292. ze.setSize(ZipLong.getValue(cfh, off));
  293. off += WORD;
  294. int fileNameLen = ZipShort.getValue(cfh, off);
  295. off += SHORT;
  296. int extraLen = ZipShort.getValue(cfh, off);
  297. off += SHORT;
  298. int commentLen = ZipShort.getValue(cfh, off);
  299. off += SHORT;
  300. off += SHORT; // disk number
  301. ze.setInternalAttributes(ZipShort.getValue(cfh, off));
  302. off += SHORT;
  303. ze.setExternalAttributes(ZipLong.getValue(cfh, off));
  304. off += WORD;
  305. byte[] fileName = new byte[fileNameLen];
  306. archive.readFully(fileName);
  307. ze.setName(getString(fileName, entryEncoding));
  308. // LFH offset,
  309. OffsetEntry offset = new OffsetEntry();
  310. offset.headerOffset = ZipLong.getValue(cfh, off);
  311. // data offset will be filled later
  312. entries.put(ze, offset);
  313. nameMap.put(ze.getName(), ze);
  314. int lenToSkip = extraLen;
  315. while (lenToSkip > 0) {
  316. int skipped = archive.skipBytes(lenToSkip);
  317. if (skipped <= 0) {
  318. throw new RuntimeException("failed to skip extra data in"
  319. + " central directory");
  320. }
  321. lenToSkip -= skipped;
  322. }
  323. byte[] comment = new byte[commentLen];
  324. archive.readFully(comment);
  325. ze.setComment(getString(comment, entryEncoding));
  326. archive.readFully(signatureBytes);
  327. sig = ZipLong.getValue(signatureBytes);
  328. }
  329. }
  330. private static final int MIN_EOCD_SIZE =
  331. /* end of central dir signature */ WORD
  332. /* number of this disk */ + SHORT
  333. /* number of the disk with the */
  334. /* start of the central directory */ + SHORT
  335. /* total number of entries in */
  336. /* the central dir on this disk */ + SHORT
  337. /* total number of entries in */
  338. /* the central dir */ + SHORT
  339. /* size of the central directory */ + WORD
  340. /* offset of start of central */
  341. /* directory with respect to */
  342. /* the starting disk number */ + WORD
  343. /* zipfile comment length */ + SHORT;
  344. private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
  345. /* maximum length of zipfile comment */ + 0xFFFF;
  346. private static final int CFD_LOCATOR_OFFSET =
  347. /* end of central dir signature */ WORD
  348. /* number of this disk */ + SHORT
  349. /* number of the disk with the */
  350. /* start of the central directory */ + SHORT
  351. /* total number of entries in */
  352. /* the central dir on this disk */ + SHORT
  353. /* total number of entries in */
  354. /* the central dir */ + SHORT
  355. /* size of the central directory */ + WORD;
  356. /**
  357. * Searches for the &quot;End of central dir record&quot;, parses
  358. * it and positions the stream at the first central directory
  359. * record.
  360. */
  361. private void positionAtCentralDirectory()
  362. throws IOException {
  363. boolean found = false;
  364. long off = archive.length() - MIN_EOCD_SIZE;
  365. long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
  366. if (off >= 0) {
  367. archive.seek(off);
  368. byte[] sig = ZipOutputStream.EOCD_SIG;
  369. int curr = archive.read();
  370. while (off >= stopSearching && curr != -1) {
  371. if (curr == sig[POS_0]) {
  372. curr = archive.read();
  373. if (curr == sig[POS_1]) {
  374. curr = archive.read();
  375. if (curr == sig[POS_2]) {
  376. curr = archive.read();
  377. if (curr == sig[POS_3]) {
  378. found = true;
  379. break;
  380. }
  381. }
  382. }
  383. }
  384. archive.seek(--off);
  385. curr = archive.read();
  386. }
  387. }
  388. if (!found) {
  389. throw new ZipException("archive is not a ZIP archive");
  390. }
  391. archive.seek(off + CFD_LOCATOR_OFFSET);
  392. byte[] cfdOffset = new byte[WORD];
  393. archive.readFully(cfdOffset);
  394. archive.seek(ZipLong.getValue(cfdOffset));
  395. }
  396. /**
  397. * Number of bytes in local file header up to the &quot;length of
  398. * filename&quot; entry.
  399. */
  400. private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
  401. /* local file header signature */ WORD
  402. /* version needed to extract */ + SHORT
  403. /* general purpose bit flag */ + SHORT
  404. /* compression method */ + SHORT
  405. /* last mod file time */ + SHORT
  406. /* last mod file date */ + SHORT
  407. /* crc-32 */ + WORD
  408. /* compressed size */ + WORD
  409. /* uncompressed size */ + WORD;
  410. /**
  411. * Walks through all recorded entries and adds the data available
  412. * from the local file header.
  413. *
  414. * <p>Also records the offsets for the data to read from the
  415. * entries.</p>
  416. */
  417. private void resolveLocalFileHeaderData()
  418. throws IOException {
  419. Enumeration e = getEntries();
  420. while (e.hasMoreElements()) {
  421. ZipEntry ze = (ZipEntry) e.nextElement();
  422. OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
  423. long offset = offsetEntry.headerOffset;
  424. archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
  425. byte[] b = new byte[SHORT];
  426. archive.readFully(b);
  427. int fileNameLen = ZipShort.getValue(b);
  428. archive.readFully(b);
  429. int extraFieldLen = ZipShort.getValue(b);
  430. int lenToSkip = fileNameLen;
  431. while (lenToSkip > 0) {
  432. int skipped = archive.skipBytes(lenToSkip);
  433. if (skipped <= 0) {
  434. throw new RuntimeException("failed to skip file name in"
  435. + " local file header");
  436. }
  437. lenToSkip -= skipped;
  438. }
  439. byte[] localExtraData = new byte[extraFieldLen];
  440. archive.readFully(localExtraData);
  441. ze.setExtra(localExtraData);
  442. /*dataOffsets.put(ze,
  443. new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
  444. + SHORT + SHORT + fileNameLen + extraFieldLen));
  445. */
  446. offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
  447. + SHORT + SHORT + fileNameLen + extraFieldLen;
  448. }
  449. }
  450. /**
  451. * Convert a DOS date/time field to a Date object.
  452. *
  453. * @param zipDosTime contains the stored DOS time.
  454. * @return a Date instance corresponding to the given time.
  455. */
  456. protected static Date fromDosTime(ZipLong zipDosTime) {
  457. long dosTime = zipDosTime.getValue();
  458. return new Date(dosToJavaTime(dosTime));
  459. }
  460. /*
  461. * Converts DOS time to Java time (number of milliseconds since epoch).
  462. */
  463. private static long dosToJavaTime(long dosTime) {
  464. Calendar cal = Calendar.getInstance();
  465. // CheckStyle:MagicNumberCheck OFF - no point
  466. cal.set(Calendar.YEAR, (int) ((dosTime >> 25) & 0x7f) + 1980);
  467. cal.set(Calendar.MONTH, (int) ((dosTime >> 21) & 0x0f) - 1);
  468. cal.set(Calendar.DATE, (int) (dosTime >> 16) & 0x1f);
  469. cal.set(Calendar.HOUR_OF_DAY, (int) (dosTime >> 11) & 0x1f);
  470. cal.set(Calendar.MINUTE, (int) (dosTime >> 5) & 0x3f);
  471. cal.set(Calendar.SECOND, (int) (dosTime << 1) & 0x3e);
  472. // CheckStyle:MagicNumberCheck ON
  473. return cal.getTime().getTime();
  474. }
  475. /**
  476. * Retrieve a String from the given bytes using the encoding set
  477. * for this ZipFile.
  478. *
  479. * @param bytes the byte array to transform
  480. * @return String obtained by using the given encoding
  481. * @throws ZipException if the encoding cannot be recognized.
  482. */
  483. protected String getString(byte[] bytes) throws ZipException {
  484. return getString(bytes, encoding);
  485. }
  486. /**
  487. * Retrieve a String from the given bytes using the encoding set
  488. * for this ZipFile.
  489. *
  490. * @param bytes the byte array to transform
  491. * @return String obtained by using the given encoding
  492. * @throws ZipException if the encoding cannot be recognized.
  493. */
  494. protected String getString(byte[] bytes, String enc)
  495. throws ZipException {
  496. if (enc == null) {
  497. return new String(bytes);
  498. } else {
  499. try {
  500. return new String(bytes, enc);
  501. } catch (UnsupportedEncodingException uee) {
  502. throw new ZipException(uee.getMessage());
  503. }
  504. }
  505. }
  506. /**
  507. * Checks whether the archive starts with a LFH. If it doesn't,
  508. * it may be an empty archive.
  509. */
  510. private boolean startsWithLocalFileHeader() throws IOException {
  511. archive.seek(0);
  512. final byte[] start = new byte[WORD];
  513. archive.readFully(start);
  514. for (int i = 0; i < start.length; i++) {
  515. if (start[i] != ZipOutputStream.LFH_SIG[i]) {
  516. return false;
  517. }
  518. }
  519. return true;
  520. }
  521. /**
  522. * InputStream that delegates requests to the underlying
  523. * RandomAccessFile, making sure that only bytes from a certain
  524. * range can be read.
  525. */
  526. private class BoundedInputStream extends InputStream {
  527. private long remaining;
  528. private long loc;
  529. private boolean addDummyByte = false;
  530. BoundedInputStream(long start, long remaining) {
  531. this.remaining = remaining;
  532. loc = start;
  533. }
  534. public int read() throws IOException {
  535. if (remaining-- <= 0) {
  536. if (addDummyByte) {
  537. addDummyByte = false;
  538. return 0;
  539. }
  540. return -1;
  541. }
  542. synchronized (archive) {
  543. archive.seek(loc++);
  544. return archive.read();
  545. }
  546. }
  547. public int read(byte[] b, int off, int len) throws IOException {
  548. if (remaining <= 0) {
  549. if (addDummyByte) {
  550. addDummyByte = false;
  551. b[off] = 0;
  552. return 1;
  553. }
  554. return -1;
  555. }
  556. if (len <= 0) {
  557. return 0;
  558. }
  559. if (len > remaining) {
  560. len = (int) remaining;
  561. }
  562. int ret = -1;
  563. synchronized (archive) {
  564. archive.seek(loc);
  565. ret = archive.read(b, off, len);
  566. }
  567. if (ret > 0) {
  568. loc += ret;
  569. remaining -= ret;
  570. }
  571. return ret;
  572. }
  573. /**
  574. * Inflater needs an extra dummy byte for nowrap - see
  575. * Inflater's javadocs.
  576. */
  577. void addDummy() {
  578. addDummyByte = true;
  579. }
  580. }
  581. }