001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.zip; 020 021 import java.io.EOFException; 022 import java.io.IOException; 023 import java.io.InputStream; 024 import java.io.PushbackInputStream; 025 import java.util.zip.CRC32; 026 import java.util.zip.DataFormatException; 027 import java.util.zip.Inflater; 028 import java.util.zip.ZipException; 029 030 import org.apache.commons.compress.archivers.ArchiveEntry; 031 import org.apache.commons.compress.archivers.ArchiveInputStream; 032 033 /** 034 * Implements an input stream that can read Zip archives. 035 * <p> 036 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information 037 * is not available from the header. 038 * <p> 039 * The {@link ZipFile} class is preferred when reading from files. 040 * 041 * @see ZipFile 042 * @NotThreadSafe 043 */ 044 public class ZipArchiveInputStream extends ArchiveInputStream { 045 046 private static final int SHORT = 2; 047 private static final int WORD = 4; 048 049 /** 050 * The zip encoding to use for filenames and the file comment. 051 */ 052 private final ZipEncoding zipEncoding; 053 054 /** 055 * Whether to look for and use Unicode extra fields. 056 */ 057 private final boolean useUnicodeExtraFields; 058 059 private final InputStream in; 060 061 private final Inflater inf = new Inflater(true); 062 private final CRC32 crc = new CRC32(); 063 064 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE]; 065 066 private ZipArchiveEntry current = null; 067 private boolean closed = false; 068 private boolean hitCentralDirectory = false; 069 private int readBytesOfEntry = 0, offsetInBuffer = 0; 070 private int bytesReadFromStream = 0; 071 private int lengthOfLastRead = 0; 072 private boolean hasDataDescriptor = false; 073 074 private static final int LFH_LEN = 30; 075 /* 076 local file header signature 4 bytes (0x04034b50) 077 version needed to extract 2 bytes 078 general purpose bit flag 2 bytes 079 compression method 2 bytes 080 last mod file time 2 bytes 081 last mod file date 2 bytes 082 crc-32 4 bytes 083 compressed size 4 bytes 084 uncompressed size 4 bytes 085 file name length 2 bytes 086 extra field length 2 bytes 087 */ 088 089 public ZipArchiveInputStream(InputStream inputStream) { 090 this(inputStream, ZipEncodingHelper.UTF8, true); 091 } 092 093 /** 094 * @param encoding the encoding to use for file names, use null 095 * for the platform's default encoding 096 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 097 * Extra Fields (if present) to set the file names. 098 */ 099 public ZipArchiveInputStream(InputStream inputStream, 100 String encoding, 101 boolean useUnicodeExtraFields) { 102 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 103 this.useUnicodeExtraFields = useUnicodeExtraFields; 104 in = new PushbackInputStream(inputStream, buf.length); 105 } 106 107 public ZipArchiveEntry getNextZipEntry() throws IOException { 108 if (closed || hitCentralDirectory) { 109 return null; 110 } 111 if (current != null) { 112 closeEntry(); 113 } 114 byte[] lfh = new byte[LFH_LEN]; 115 try { 116 readFully(lfh); 117 } catch (EOFException e) { 118 return null; 119 } 120 ZipLong sig = new ZipLong(lfh); 121 if (sig.equals(ZipLong.CFH_SIG)) { 122 hitCentralDirectory = true; 123 return null; 124 } 125 if (!sig.equals(ZipLong.LFH_SIG)) { 126 return null; 127 } 128 129 int off = WORD; 130 current = new ZipArchiveEntry(); 131 132 int versionMadeBy = ZipShort.getValue(lfh, off); 133 off += SHORT; 134 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) 135 & ZipFile.NIBLET_MASK); 136 137 final int generalPurposeFlag = ZipShort.getValue(lfh, off); 138 final boolean hasEFS = 139 (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0; 140 final ZipEncoding entryEncoding = 141 hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 142 hasDataDescriptor = (generalPurposeFlag & 8) != 0; 143 144 off += SHORT; 145 146 current.setMethod(ZipShort.getValue(lfh, off)); 147 off += SHORT; 148 149 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off)); 150 current.setTime(time); 151 off += WORD; 152 153 if (!hasDataDescriptor) { 154 current.setCrc(ZipLong.getValue(lfh, off)); 155 off += WORD; 156 157 current.setCompressedSize(ZipLong.getValue(lfh, off)); 158 off += WORD; 159 160 current.setSize(ZipLong.getValue(lfh, off)); 161 off += WORD; 162 } else { 163 off += 3 * WORD; 164 } 165 166 int fileNameLen = ZipShort.getValue(lfh, off); 167 168 off += SHORT; 169 170 int extraLen = ZipShort.getValue(lfh, off); 171 off += SHORT; 172 173 byte[] fileName = new byte[fileNameLen]; 174 readFully(fileName); 175 current.setName(entryEncoding.decode(fileName)); 176 177 byte[] extraData = new byte[extraLen]; 178 readFully(extraData); 179 current.setExtra(extraData); 180 181 if (!hasEFS && useUnicodeExtraFields) { 182 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null); 183 } 184 return current; 185 } 186 187 public ArchiveEntry getNextEntry() throws IOException { 188 return getNextZipEntry(); 189 } 190 191 public int read(byte[] buffer, int start, int length) throws IOException { 192 if (closed) { 193 throw new IOException("The stream is closed"); 194 } 195 if (inf.finished() || current == null) { 196 return -1; 197 } 198 199 // avoid int overflow, check null buffer 200 if (start <= buffer.length && length >= 0 && start >= 0 201 && buffer.length - start >= length) { 202 if (current.getMethod() == ZipArchiveOutputStream.STORED) { 203 int csize = (int) current.getSize(); 204 if (readBytesOfEntry >= csize) { 205 return -1; 206 } 207 if (offsetInBuffer >= lengthOfLastRead) { 208 offsetInBuffer = 0; 209 if ((lengthOfLastRead = in.read(buf)) == -1) { 210 return -1; 211 } 212 count(lengthOfLastRead); 213 bytesReadFromStream += lengthOfLastRead; 214 } 215 int toRead = length > lengthOfLastRead 216 ? lengthOfLastRead - offsetInBuffer 217 : length; 218 if ((csize - readBytesOfEntry) < toRead) { 219 toRead = csize - readBytesOfEntry; 220 } 221 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead); 222 offsetInBuffer += toRead; 223 readBytesOfEntry += toRead; 224 crc.update(buffer, start, toRead); 225 return toRead; 226 } 227 if (inf.needsInput()) { 228 fill(); 229 if (lengthOfLastRead > 0) { 230 bytesReadFromStream += lengthOfLastRead; 231 } 232 } 233 int read = 0; 234 try { 235 read = inf.inflate(buffer, start, length); 236 } catch (DataFormatException e) { 237 throw new ZipException(e.getMessage()); 238 } 239 if (read == 0) { 240 if (inf.finished()) { 241 return -1; 242 } else if (lengthOfLastRead == -1) { 243 throw new IOException("Truncated ZIP file"); 244 } 245 } 246 crc.update(buffer, start, read); 247 return read; 248 } 249 throw new ArrayIndexOutOfBoundsException(); 250 } 251 252 public void close() throws IOException { 253 if (!closed) { 254 closed = true; 255 in.close(); 256 } 257 } 258 259 public long skip(long value) throws IOException { 260 if (value >= 0) { 261 long skipped = 0; 262 byte[] b = new byte[1024]; 263 while (skipped != value) { 264 long rem = value - skipped; 265 int x = read(b, 0, (int) (b.length > rem ? rem : b.length)); 266 if (x == -1) { 267 return skipped; 268 } 269 skipped += x; 270 } 271 return skipped; 272 } 273 throw new IllegalArgumentException(); 274 } 275 276 /* 277 * This test assumes that the zip file does not have any additional leading content, 278 * which is something that is allowed by the specification (e.g. self-extracting zips) 279 */ 280 public static boolean matches(byte[] signature, int length) { 281 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 282 return false; 283 } 284 285 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 286 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip 287 } 288 289 private static boolean checksig(byte[] signature, byte[] expected){ 290 for (int i = 0; i < expected.length; i++) { 291 if (signature[i] != expected[i]) { 292 return false; 293 } 294 } 295 return true; 296 } 297 298 private void closeEntry() throws IOException { 299 if (closed) { 300 throw new IOException("The stream is closed"); 301 } 302 if (current == null) { 303 return; 304 } 305 // Ensure all entry bytes are read 306 skip(Long.MAX_VALUE); 307 int inB; 308 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) { 309 inB = inf.getTotalIn(); 310 } else { 311 inB = readBytesOfEntry; 312 } 313 int diff = 0; 314 315 // Pushback any required bytes 316 if ((diff = bytesReadFromStream - inB) != 0) { 317 ((PushbackInputStream) in).unread(buf, 318 lengthOfLastRead - diff, diff); 319 } 320 321 if (hasDataDescriptor) { 322 readFully(new byte[4 * WORD]); 323 } 324 325 inf.reset(); 326 readBytesOfEntry = offsetInBuffer = bytesReadFromStream = 327 lengthOfLastRead = 0; 328 crc.reset(); 329 current = null; 330 } 331 332 private void fill() throws IOException { 333 if (closed) { 334 throw new IOException("The stream is closed"); 335 } 336 if ((lengthOfLastRead = in.read(buf)) > 0) { 337 inf.setInput(buf, 0, lengthOfLastRead); 338 } 339 } 340 341 private void readFully(byte[] b) throws IOException { 342 int count = 0, x = 0; 343 while (count != b.length) { 344 count += x = in.read(b, count, b.length - count); 345 if (x == -1) { 346 throw new EOFException(); 347 } 348 } 349 } 350 }