You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

TarInputStream.java 22 kB

11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. /*
  19. * This package is based on the work done by Timothy Gerard Endres
  20. * (time@ice.com) to whom the Ant project is very grateful for his great code.
  21. */
  22. package org.apache.tools.tar;
  23. import java.io.ByteArrayOutputStream;
  24. import java.io.FilterInputStream;
  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.io.OutputStream;
  28. import java.util.HashMap;
  29. import java.util.Map;
  30. import java.util.Map.Entry;
  31. import org.apache.tools.zip.ZipEncoding;
  32. import org.apache.tools.zip.ZipEncodingHelper;
  33. /**
  34. * The TarInputStream reads a UNIX tar archive as an InputStream.
  35. * methods are provided to position at each successive entry in
  36. * the archive, and the read each entry as a normal input stream
  37. * using read().
  38. *
  39. */
  40. public class TarInputStream extends FilterInputStream {
  41. private static final int SMALL_BUFFER_SIZE = 256;
  42. private static final int BUFFER_SIZE = 8 * 1024;
  43. private static final int LARGE_BUFFER_SIZE = 32 * 1024;
  44. private static final int BYTE_MASK = 0xFF;
  45. private final byte[] SKIP_BUF = new byte[BUFFER_SIZE];
  46. private final byte[] SMALL_BUF = new byte[SMALL_BUFFER_SIZE];
  47. // CheckStyle:VisibilityModifier OFF - bc
  48. protected boolean debug;
  49. protected boolean hasHitEOF;
  50. protected long entrySize;
  51. protected long entryOffset;
  52. protected byte[] readBuf;
  53. protected TarBuffer buffer;
  54. protected TarEntry currEntry;
  55. /**
  56. * This contents of this array is not used at all in this class,
  57. * it is only here to avoid repeated object creation during calls
  58. * to the no-arg read method.
  59. */
  60. protected byte[] oneBuf;
  61. // CheckStyle:VisibilityModifier ON
  62. private final ZipEncoding encoding;
  63. /**
  64. * Constructor for TarInputStream.
  65. * @param is the input stream to use
  66. */
  67. public TarInputStream(InputStream is) {
  68. this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
  69. }
  70. /**
  71. * Constructor for TarInputStream.
  72. * @param is the input stream to use
  73. * @param encoding name of the encoding to use for file names
  74. */
  75. public TarInputStream(InputStream is, String encoding) {
  76. this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
  77. }
  78. /**
  79. * Constructor for TarInputStream.
  80. * @param is the input stream to use
  81. * @param blockSize the block size to use
  82. */
  83. public TarInputStream(InputStream is, int blockSize) {
  84. this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
  85. }
  86. /**
  87. * Constructor for TarInputStream.
  88. * @param is the input stream to use
  89. * @param blockSize the block size to use
  90. * @param encoding name of the encoding to use for file names
  91. */
  92. public TarInputStream(InputStream is, int blockSize, String encoding) {
  93. this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
  94. }
  95. /**
  96. * Constructor for TarInputStream.
  97. * @param is the input stream to use
  98. * @param blockSize the block size to use
  99. * @param recordSize the record size to use
  100. */
  101. public TarInputStream(InputStream is, int blockSize, int recordSize) {
  102. this(is, blockSize, recordSize, null);
  103. }
  104. /**
  105. * Constructor for TarInputStream.
  106. * @param is the input stream to use
  107. * @param blockSize the block size to use
  108. * @param recordSize the record size to use
  109. * @param encoding name of the encoding to use for file names
  110. */
  111. public TarInputStream(InputStream is, int blockSize, int recordSize,
  112. String encoding) {
  113. super(is);
  114. this.buffer = new TarBuffer(is, blockSize, recordSize);
  115. this.readBuf = null;
  116. this.oneBuf = new byte[1];
  117. this.debug = false;
  118. this.hasHitEOF = false;
  119. this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
  120. }
  121. /**
  122. * Sets the debugging flag.
  123. *
  124. * @param debug True to turn on debugging.
  125. */
  126. public void setDebug(boolean debug) {
  127. this.debug = debug;
  128. buffer.setDebug(debug);
  129. }
  130. /**
  131. * Closes this stream. Calls the TarBuffer's close() method.
  132. * @throws IOException on error
  133. */
  134. @Override
  135. public void close() throws IOException {
  136. buffer.close();
  137. }
  138. /**
  139. * Get the record size being used by this stream's TarBuffer.
  140. *
  141. * @return The TarBuffer record size.
  142. */
  143. public int getRecordSize() {
  144. return buffer.getRecordSize();
  145. }
  146. /**
  147. * Get the available data that can be read from the current
  148. * entry in the archive. This does not indicate how much data
  149. * is left in the entire archive, only in the current entry.
  150. * This value is determined from the entry's size header field
  151. * and the amount of data already read from the current entry.
  152. * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE
  153. * bytes are left in the current entry in the archive.
  154. *
  155. * @return The number of available bytes for the current entry.
  156. * @throws IOException for signature
  157. */
  158. @Override
  159. public int available() throws IOException {
  160. if (isDirectory()) {
  161. return 0;
  162. }
  163. if (entrySize - entryOffset > Integer.MAX_VALUE) {
  164. return Integer.MAX_VALUE;
  165. }
  166. return (int) (entrySize - entryOffset);
  167. }
  168. /**
  169. * Skip bytes in the input buffer. This skips bytes in the
  170. * current entry's data, not the entire archive, and will
  171. * stop at the end of the current entry's data if the number
  172. * to skip extends beyond that point.
  173. *
  174. * @param numToSkip The number of bytes to skip.
  175. * @return the number actually skipped
  176. * @throws IOException on error
  177. */
  178. @Override
  179. public long skip(long numToSkip) throws IOException {
  180. if (numToSkip <= 0 || isDirectory()) {
  181. return 0;
  182. }
  183. // REVIEW
  184. // This is horribly inefficient, but it ensures that we
  185. // properly skip over bytes via the TarBuffer...
  186. //
  187. long skip = numToSkip;
  188. while (skip > 0) {
  189. int realSkip = (int) (skip > SKIP_BUF.length
  190. ? SKIP_BUF.length : skip);
  191. int numRead = read(SKIP_BUF, 0, realSkip);
  192. if (numRead == -1) {
  193. break;
  194. }
  195. skip -= numRead;
  196. }
  197. return (numToSkip - skip);
  198. }
  199. /**
  200. * Since we do not support marking just yet, we return false.
  201. *
  202. * @return False.
  203. */
  204. @Override
  205. public boolean markSupported() {
  206. return false;
  207. }
  208. /**
  209. * Since we do not support marking just yet, we do nothing.
  210. *
  211. * @param markLimit The limit to mark.
  212. */
  213. @Override
  214. public void mark(int markLimit) {
  215. }
  216. /**
  217. * Since we do not support marking just yet, we do nothing.
  218. */
  219. @Override
  220. public void reset() {
  221. }
  222. /**
  223. * Get the next entry in this tar archive. This will skip
  224. * over any remaining data in the current entry, if there
  225. * is one, and place the input stream at the header of the
  226. * next entry, and read the header and instantiate a new
  227. * TarEntry from the header bytes and return that entry.
  228. * If there are no more entries in the archive, null will
  229. * be returned to indicate that the end of the archive has
  230. * been reached.
  231. *
  232. * @return The next TarEntry in the archive, or null.
  233. * @throws IOException on error
  234. */
  235. public TarEntry getNextEntry() throws IOException {
  236. if (hasHitEOF) {
  237. return null;
  238. }
  239. if (currEntry != null) {
  240. long numToSkip = entrySize - entryOffset;
  241. if (debug) {
  242. System.err.println("TarInputStream: SKIP currENTRY '"
  243. + currEntry.getName() + "' SZ "
  244. + entrySize + " OFF "
  245. + entryOffset + " skipping "
  246. + numToSkip + " bytes");
  247. }
  248. while (numToSkip > 0) {
  249. long skipped = skip(numToSkip);
  250. if (skipped <= 0) {
  251. throw new IOException("failed to skip current tar"
  252. + " entry");
  253. }
  254. numToSkip -= skipped;
  255. }
  256. readBuf = null;
  257. }
  258. byte[] headerBuf = getRecord();
  259. if (hasHitEOF) {
  260. currEntry = null;
  261. return null;
  262. }
  263. try {
  264. currEntry = new TarEntry(headerBuf, encoding);
  265. } catch (IllegalArgumentException e) {
  266. IOException ioe = new IOException("Error detected parsing the header");
  267. ioe.initCause(e);
  268. throw ioe;
  269. }
  270. if (debug) {
  271. System.err.println("TarInputStream: SET CURRENTRY '"
  272. + currEntry.getName()
  273. + "' size = "
  274. + currEntry.getSize());
  275. }
  276. entryOffset = 0;
  277. entrySize = currEntry.getSize();
  278. if (currEntry.isGNULongLinkEntry()) {
  279. byte[] longLinkData = getLongNameData();
  280. if (longLinkData == null) {
  281. // Bugzilla: 40334
  282. // Malformed tar file - long link entry name not followed by
  283. // entry
  284. return null;
  285. }
  286. currEntry.setLinkName(encoding.decode(longLinkData));
  287. }
  288. if (currEntry.isGNULongNameEntry()) {
  289. byte[] longNameData = getLongNameData();
  290. if (longNameData == null) {
  291. // Bugzilla: 40334
  292. // Malformed tar file - long entry name not followed by
  293. // entry
  294. return null;
  295. }
  296. currEntry.setName(encoding.decode(longNameData));
  297. }
  298. if (currEntry.isPaxHeader()) { // Process Pax headers
  299. paxHeaders();
  300. }
  301. if (currEntry.isGNUSparse()) { // Process sparse files
  302. readGNUSparse();
  303. }
  304. // If the size of the next element in the archive has changed
  305. // due to a new size being reported in the posix header
  306. // information, we update entrySize here so that it contains
  307. // the correct value.
  308. entrySize = currEntry.getSize();
  309. return currEntry;
  310. }
  311. /**
  312. * Get the next entry in this tar archive as longname data.
  313. *
  314. * @return The next entry in the archive as longname data, or null.
  315. * @throws IOException on error
  316. */
  317. protected byte[] getLongNameData() throws IOException {
  318. // read in the name
  319. ByteArrayOutputStream longName = new ByteArrayOutputStream();
  320. int length = 0;
  321. while ((length = read(SMALL_BUF)) >= 0) {
  322. longName.write(SMALL_BUF, 0, length);
  323. }
  324. getNextEntry();
  325. if (currEntry == null) {
  326. // Bugzilla: 40334
  327. // Malformed tar file - long entry name not followed by entry
  328. return null;
  329. }
  330. byte[] longNameData = longName.toByteArray();
  331. // remove trailing null terminator(s)
  332. length = longNameData.length;
  333. while (length > 0 && longNameData[length - 1] == 0) {
  334. --length;
  335. }
  336. if (length != longNameData.length) {
  337. byte[] l = new byte[length];
  338. System.arraycopy(longNameData, 0, l, 0, length);
  339. longNameData = l;
  340. }
  341. return longNameData;
  342. }
  343. /**
  344. * Get the next record in this tar archive. This will skip
  345. * over any remaining data in the current entry, if there
  346. * is one, and place the input stream at the header of the
  347. * next entry.
  348. * If there are no more entries in the archive, null will
  349. * be returned to indicate that the end of the archive has
  350. * been reached.
  351. *
  352. * @return The next header in the archive, or null.
  353. * @throws IOException on error
  354. */
  355. private byte[] getRecord() throws IOException {
  356. if (hasHitEOF) {
  357. return null;
  358. }
  359. byte[] headerBuf = buffer.readRecord();
  360. if (headerBuf == null) {
  361. if (debug) {
  362. System.err.println("READ NULL RECORD");
  363. }
  364. hasHitEOF = true;
  365. } else if (buffer.isEOFRecord(headerBuf)) {
  366. if (debug) {
  367. System.err.println("READ EOF RECORD");
  368. }
  369. hasHitEOF = true;
  370. }
  371. return hasHitEOF ? null : headerBuf;
  372. }
  373. private void paxHeaders() throws IOException {
  374. Map<String, String> headers = parsePaxHeaders(this);
  375. getNextEntry(); // Get the actual file entry
  376. applyPaxHeadersToCurrentEntry(headers);
  377. }
  378. Map<String, String> parsePaxHeaders(InputStream i) throws IOException {
  379. Map<String, String> headers = new HashMap<String, String>();
  380. // Format is "length keyword=value\n";
  381. while (true) { // get length
  382. int ch;
  383. int len = 0;
  384. int read = 0;
  385. while ((ch = i.read()) != -1) {
  386. read++;
  387. if (ch == ' ') { // End of length string
  388. // Get keyword
  389. ByteArrayOutputStream coll = new ByteArrayOutputStream();
  390. while ((ch = i.read()) != -1) {
  391. read++;
  392. if (ch == '=') { // end of keyword
  393. String keyword = coll.toString("UTF-8");
  394. // Get rest of entry
  395. final int restLen = len - read;
  396. ByteArrayOutputStream bos = new ByteArrayOutputStream();
  397. int got = 0;
  398. while (got < restLen && (ch = i.read()) != -1) {
  399. bos.write((byte) ch);
  400. got++;
  401. }
  402. bos.close();
  403. if (got != restLen) {
  404. throw new IOException("Failed to read "
  405. + "Paxheader. Expected "
  406. + restLen
  407. + " bytes, read "
  408. + got);
  409. }
  410. byte[] rest = bos.toByteArray();
  411. // Drop trailing NL
  412. String value = new String(rest, 0,
  413. restLen - 1, "UTF-8");
  414. headers.put(keyword, value);
  415. break;
  416. }
  417. coll.write((byte) ch);
  418. }
  419. break; // Processed single header
  420. }
  421. len *= 10;
  422. len += ch - '0';
  423. }
  424. if (ch == -1) { // EOF
  425. break;
  426. }
  427. }
  428. return headers;
  429. }
  430. private void applyPaxHeadersToCurrentEntry(Map<String, String> headers) {
  431. /*
  432. * The following headers are defined for Pax.
  433. * atime, ctime, charset: cannot use these without changing TarEntry fields
  434. * mtime
  435. * comment
  436. * gid, gname
  437. * linkpath
  438. * size
  439. * uid,uname
  440. * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those
  441. */
  442. for (Entry<String, String> ent : headers.entrySet()) {
  443. String key = ent.getKey();
  444. String val = ent.getValue();
  445. if ("path".equals(key)) {
  446. currEntry.setName(val);
  447. } else if ("linkpath".equals(key)) {
  448. currEntry.setLinkName(val);
  449. } else if ("gid".equals(key)) {
  450. currEntry.setGroupId(Long.parseLong(val));
  451. } else if ("gname".equals(key)) {
  452. currEntry.setGroupName(val);
  453. } else if ("uid".equals(key)) {
  454. currEntry.setUserId(Long.parseLong(val));
  455. } else if ("uname".equals(key)) {
  456. currEntry.setUserName(val);
  457. } else if ("size".equals(key)) {
  458. currEntry.setSize(Long.parseLong(val));
  459. } else if ("mtime".equals(key)) {
  460. currEntry.setModTime((long) (Double.parseDouble(val) * 1000));
  461. } else if ("SCHILY.devminor".equals(key)) {
  462. currEntry.setDevMinor(Integer.parseInt(val));
  463. } else if ("SCHILY.devmajor".equals(key)) {
  464. currEntry.setDevMajor(Integer.parseInt(val));
  465. }
  466. }
  467. }
  468. /**
  469. * Adds the sparse chunks from the current entry to the sparse chunks,
  470. * including any additional sparse entries following the current entry.
  471. *
  472. * @throws IOException on error
  473. *
  474. * @todo Sparse files get not yet really processed.
  475. */
  476. private void readGNUSparse() throws IOException {
  477. /* we do not really process sparse files yet
  478. sparses = new ArrayList();
  479. sparses.addAll(currEntry.getSparses());
  480. */
  481. if (currEntry.isExtended()) {
  482. TarArchiveSparseEntry entry;
  483. do {
  484. byte[] headerBuf = getRecord();
  485. if (hasHitEOF) {
  486. currEntry = null;
  487. break;
  488. }
  489. entry = new TarArchiveSparseEntry(headerBuf);
  490. /* we do not really process sparse files yet
  491. sparses.addAll(entry.getSparses());
  492. */
  493. } while (entry.isExtended());
  494. }
  495. }
  496. /**
  497. * Reads a byte from the current tar archive entry.
  498. *
  499. * This method simply calls read(byte[], int, int).
  500. *
  501. * @return The byte read, or -1 at EOF.
  502. * @throws IOException on error
  503. */
  504. @Override
  505. public int read() throws IOException {
  506. int num = read(oneBuf, 0, 1);
  507. return num == -1 ? -1 : (oneBuf[0]) & BYTE_MASK;
  508. }
  509. /**
  510. * Reads bytes from the current tar archive entry.
  511. *
  512. * This method is aware of the boundaries of the current
  513. * entry in the archive and will deal with them as if they
  514. * were this stream's start and EOF.
  515. *
  516. * @param buf The buffer into which to place bytes read.
  517. * @param offset The offset at which to place bytes read.
  518. * @param numToRead The number of bytes to read.
  519. * @return The number of bytes read, or -1 at EOF.
  520. * @throws IOException on error
  521. */
  522. @Override
  523. public int read(byte[] buf, int offset, int numToRead) throws IOException {
  524. int totalRead = 0;
  525. if (entryOffset >= entrySize || isDirectory()) {
  526. return -1;
  527. }
  528. if ((numToRead + entryOffset) > entrySize) {
  529. numToRead = (int) (entrySize - entryOffset);
  530. }
  531. if (readBuf != null) {
  532. int sz = (numToRead > readBuf.length) ? readBuf.length
  533. : numToRead;
  534. System.arraycopy(readBuf, 0, buf, offset, sz);
  535. if (sz >= readBuf.length) {
  536. readBuf = null;
  537. } else {
  538. int newLen = readBuf.length - sz;
  539. byte[] newBuf = new byte[newLen];
  540. System.arraycopy(readBuf, sz, newBuf, 0, newLen);
  541. readBuf = newBuf;
  542. }
  543. totalRead += sz;
  544. numToRead -= sz;
  545. offset += sz;
  546. }
  547. while (numToRead > 0) {
  548. byte[] rec = buffer.readRecord();
  549. if (rec == null) {
  550. // Unexpected EOF!
  551. throw new IOException("unexpected EOF with " + numToRead
  552. + " bytes unread");
  553. }
  554. int sz = numToRead;
  555. int recLen = rec.length;
  556. if (recLen > sz) {
  557. System.arraycopy(rec, 0, buf, offset, sz);
  558. readBuf = new byte[recLen - sz];
  559. System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
  560. } else {
  561. sz = recLen;
  562. System.arraycopy(rec, 0, buf, offset, recLen);
  563. }
  564. totalRead += sz;
  565. numToRead -= sz;
  566. offset += sz;
  567. }
  568. entryOffset += totalRead;
  569. return totalRead;
  570. }
  571. /**
  572. * Copies the contents of the current tar archive entry directly into
  573. * an output stream.
  574. *
  575. * @param out The OutputStream into which to write the entry's data.
  576. * @throws IOException on error
  577. */
  578. public void copyEntryContents(OutputStream out) throws IOException {
  579. byte[] buf = new byte[LARGE_BUFFER_SIZE];
  580. while (true) {
  581. int numRead = read(buf, 0, buf.length);
  582. if (numRead == -1) {
  583. break;
  584. }
  585. out.write(buf, 0, numRead);
  586. }
  587. }
  588. /**
  589. * Whether this class is able to read the given entry.
  590. *
  591. * <p>May return false if the current entry is a sparse file.</p>
  592. *
  593. * @param te TarEntry
  594. * @return boolean
  595. */
  596. public boolean canReadEntryData(TarEntry te) {
  597. return !te.isGNUSparse();
  598. }
  599. private boolean isDirectory() {
  600. return currEntry != null && currEntry.isDirectory();
  601. }
  602. }