001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.zip;
020    
021    import java.io.EOFException;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.io.PushbackInputStream;
025    import java.util.zip.CRC32;
026    import java.util.zip.DataFormatException;
027    import java.util.zip.Inflater;
028    import java.util.zip.ZipException;
029    
030    import org.apache.commons.compress.archivers.ArchiveEntry;
031    import org.apache.commons.compress.archivers.ArchiveInputStream;
032    
033    /**
034     * Implements an input stream that can read Zip archives.
035     * <p>
036     * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
037     * is not available from the header.
038     * <p>
039     * The {@link ZipFile} class is preferred when reading from files.
040     *  
041     * @see ZipFile
042     * @NotThreadSafe
043     */
044    public class ZipArchiveInputStream extends ArchiveInputStream {
045    
046        private static final int SHORT = 2;
047        private static final int WORD = 4;
048    
049        /**
050         * The zip encoding to use for filenames and the file comment.
051         */
052        private final ZipEncoding zipEncoding;
053    
054        /**
055         * Whether to look for and use Unicode extra fields.
056         */
057        private final boolean useUnicodeExtraFields;
058    
059        private final InputStream in;
060    
061        private final Inflater inf = new Inflater(true);
062        private final CRC32 crc = new CRC32();
063    
064        private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
065    
066        private ZipArchiveEntry current = null;
067        private boolean closed = false;
068        private boolean hitCentralDirectory = false;
069        private int readBytesOfEntry = 0, offsetInBuffer = 0;
070        private int bytesReadFromStream = 0;
071        private int lengthOfLastRead = 0;
072        private boolean hasDataDescriptor = false;
073    
074        private static final int LFH_LEN = 30;
075        /*
076          local file header signature     4 bytes  (0x04034b50)
077          version needed to extract       2 bytes
078          general purpose bit flag        2 bytes
079          compression method              2 bytes
080          last mod file time              2 bytes
081          last mod file date              2 bytes
082          crc-32                          4 bytes
083          compressed size                 4 bytes
084          uncompressed size               4 bytes
085          file name length                2 bytes
086          extra field length              2 bytes
087        */
088    
089        public ZipArchiveInputStream(InputStream inputStream) {
090            this(inputStream, ZipEncodingHelper.UTF8, true);
091        }
092    
093        /**
094         * @param encoding the encoding to use for file names, use null
095         * for the platform's default encoding
096         * @param useUnicodeExtraFields whether to use InfoZIP Unicode
097         * Extra Fields (if present) to set the file names.
098         */
099        public ZipArchiveInputStream(InputStream inputStream,
100                                     String encoding,
101                                     boolean useUnicodeExtraFields) {
102            zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
103            this.useUnicodeExtraFields = useUnicodeExtraFields;
104            in = new PushbackInputStream(inputStream, buf.length);
105        }
106    
107        public ZipArchiveEntry getNextZipEntry() throws IOException {
108            if (closed || hitCentralDirectory) {
109                return null;
110            }
111            if (current != null) {
112                closeEntry();
113            }
114            byte[] lfh = new byte[LFH_LEN];
115            try {
116                readFully(lfh);
117            } catch (EOFException e) {
118                return null;
119            }
120            ZipLong sig = new ZipLong(lfh);
121            if (sig.equals(ZipLong.CFH_SIG)) {
122                hitCentralDirectory = true;
123                return null;
124            }
125            if (!sig.equals(ZipLong.LFH_SIG)) {
126                return null;
127            }
128    
129            int off = WORD;
130            current = new ZipArchiveEntry();
131    
132            int versionMadeBy = ZipShort.getValue(lfh, off);
133            off += SHORT;
134            current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
135                                & ZipFile.NIBLET_MASK);
136    
137            final int generalPurposeFlag = ZipShort.getValue(lfh, off);
138            final boolean hasEFS = 
139                (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
140            final ZipEncoding entryEncoding =
141                hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
142            hasDataDescriptor = (generalPurposeFlag & 8) != 0;
143    
144            off += SHORT;
145    
146            current.setMethod(ZipShort.getValue(lfh, off));
147            off += SHORT;
148    
149            long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
150            current.setTime(time);
151            off += WORD;
152    
153            if (!hasDataDescriptor) {
154                current.setCrc(ZipLong.getValue(lfh, off));
155                off += WORD;
156    
157                current.setCompressedSize(ZipLong.getValue(lfh, off));
158                off += WORD;
159    
160                current.setSize(ZipLong.getValue(lfh, off));
161                off += WORD;
162            } else {
163                off += 3 * WORD;
164            }
165    
166            int fileNameLen = ZipShort.getValue(lfh, off);
167    
168            off += SHORT;
169    
170            int extraLen = ZipShort.getValue(lfh, off);
171            off += SHORT;
172    
173            byte[] fileName = new byte[fileNameLen];
174            readFully(fileName);
175            current.setName(entryEncoding.decode(fileName));
176    
177            byte[] extraData = new byte[extraLen];
178            readFully(extraData);
179            current.setExtra(extraData);
180    
181            if (!hasEFS && useUnicodeExtraFields) {
182                ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
183            }
184            return current;
185        }
186    
187        public ArchiveEntry getNextEntry() throws IOException {
188            return getNextZipEntry();
189        }
190    
191        public int read(byte[] buffer, int start, int length) throws IOException {
192            if (closed) {
193                throw new IOException("The stream is closed");
194            }
195            if (inf.finished() || current == null) {
196                return -1;
197            }
198    
199            // avoid int overflow, check null buffer
200            if (start <= buffer.length && length >= 0 && start >= 0
201                && buffer.length - start >= length) {
202                if (current.getMethod() == ZipArchiveOutputStream.STORED) {
203                    int csize = (int) current.getSize();
204                    if (readBytesOfEntry >= csize) {
205                        return -1;
206                    }
207                    if (offsetInBuffer >= lengthOfLastRead) {
208                        offsetInBuffer = 0;
209                        if ((lengthOfLastRead = in.read(buf)) == -1) {
210                            return -1;
211                        }
212                        count(lengthOfLastRead);
213                        bytesReadFromStream += lengthOfLastRead;
214                    }
215                    int toRead = length > lengthOfLastRead
216                        ? lengthOfLastRead - offsetInBuffer
217                        : length;
218                    if ((csize - readBytesOfEntry) < toRead) {
219                        toRead = csize - readBytesOfEntry;
220                    }
221                    System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
222                    offsetInBuffer += toRead;
223                    readBytesOfEntry += toRead;
224                    crc.update(buffer, start, toRead);
225                    return toRead;
226                }
227                if (inf.needsInput()) {
228                    fill();
229                    if (lengthOfLastRead > 0) {
230                        bytesReadFromStream += lengthOfLastRead;
231                    }
232                }
233                int read = 0;
234                try {
235                    read = inf.inflate(buffer, start, length);
236                } catch (DataFormatException e) {
237                    throw new ZipException(e.getMessage());
238                }
239                if (read == 0) {
240                    if (inf.finished()) {
241                        return -1;
242                    } else if (lengthOfLastRead == -1) {
243                        throw new IOException("Truncated ZIP file");
244                    }
245                }
246                crc.update(buffer, start, read);
247                return read;
248            }
249            throw new ArrayIndexOutOfBoundsException();
250        }
251    
252        public void close() throws IOException {
253            if (!closed) {
254                closed = true;
255                in.close();
256            }
257        }
258    
259        public long skip(long value) throws IOException {
260            if (value >= 0) {
261                long skipped = 0;
262                byte[] b = new byte[1024];
263                while (skipped != value) {
264                    long rem = value - skipped;
265                    int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
266                    if (x == -1) {
267                        return skipped;
268                    }
269                    skipped += x;
270                }
271                return skipped;
272            }
273            throw new IllegalArgumentException();
274        }
275    
276        /*
277         *  This test assumes that the zip file does not have any additional leading content,
278         *  which is something that is allowed by the specification (e.g. self-extracting zips)
279         */
280        public static boolean matches(byte[] signature, int length) {
281            if (length < ZipArchiveOutputStream.LFH_SIG.length) {
282                return false;
283            }
284    
285            return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
286                || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
287        }
288    
289        private static boolean checksig(byte[] signature, byte[] expected){
290            for (int i = 0; i < expected.length; i++) {
291                if (signature[i] != expected[i]) {
292                    return false;
293                }
294            }
295            return true;        
296        }
297    
298        private void closeEntry() throws IOException {
299            if (closed) {
300                throw new IOException("The stream is closed");
301            }
302            if (current == null) {
303                return;
304            }
305            // Ensure all entry bytes are read
306            skip(Long.MAX_VALUE);
307            int inB;
308            if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
309                inB = inf.getTotalIn();
310            } else {
311                inB = readBytesOfEntry;
312            }
313            int diff = 0;
314    
315            // Pushback any required bytes
316            if ((diff = bytesReadFromStream - inB) != 0) {
317                ((PushbackInputStream) in).unread(buf,
318                                                  lengthOfLastRead - diff, diff);
319            }
320    
321            if (hasDataDescriptor) {
322                readFully(new byte[4 * WORD]);
323            }
324    
325            inf.reset();
326            readBytesOfEntry = offsetInBuffer = bytesReadFromStream =
327                lengthOfLastRead = 0;
328            crc.reset();
329            current = null;
330        }
331    
332        private void fill() throws IOException {
333            if (closed) {
334                throw new IOException("The stream is closed");
335            }
336            if ((lengthOfLastRead = in.read(buf)) > 0) {
337                inf.setInput(buf, 0, lengthOfLastRead);
338            }
339        }
340    
341        private void readFully(byte[] b) throws IOException {
342            int count = 0, x = 0;
343            while (count != b.length) {
344                count += x = in.read(b, count, b.length - count);
345                if (x == -1) {
346                    throw new EOFException();
347                }
348            }
349        }
350    }