View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.io;
18   
19  import java.io.ByteArrayInputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.OutputStream;
23  import java.io.SequenceInputStream;
24  import java.io.UnsupportedEncodingException;
25  import java.util.ArrayList;
26  import java.util.Collections;
27  import java.util.List;
28  
29  /**
30   * This class implements an output stream in which the data is 
31   * written into a byte array. The buffer automatically grows as data 
32   * is written to it.
33   * <p> 
34   * The data can be retrieved using <code>toByteArray()</code> and
35   * <code>toString()</code>.
36   * <p>
37   * Closing a <tt>ByteArrayOutputStream</tt> has no effect. The methods in
38   * this class can be called after the stream has been closed without
39   * generating an <tt>IOException</tt>.
40   * <p>
41   * This is an alternative implementation of the java.io.ByteArrayOutputStream
42   * class. The original implementation only allocates 32 bytes at the beginning.
43   * As this class is designed for heavy duty it starts at 1024 bytes. In contrast
44   * to the original it doesn't reallocate the whole memory block but allocates
45   * additional buffers. This way no buffers need to be garbage collected and
46   * the contents don't have to be copied to the new buffer. This class is
47   * designed to behave exactly like the original. The only exception is the
48   * deprecated toString(int) method that has been ignored.
49   * 
50   * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
51   * @author Holger Hoffstatte
52   * @since Apache Tika 0.4, copied from Commons IO 1.4
53   */
54  public class ByteArrayOutputStream extends OutputStream {
55  
56      /** A singleton empty byte array. */
57      private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
58  
59      /** The list of buffers, which grows and never reduces. */
60      private final List<byte[]> buffers = new ArrayList<byte[]>();
61      /** The index of the current buffer. */
62      private int currentBufferIndex;
63      /** The total count of bytes in all the filled buffers. */
64      private int filledBufferSum;
65      /** The current buffer. */
66      private byte[] currentBuffer;
67      /** The total count of bytes written. */
68      private int count;
69  
70      /**
71       * Creates a new byte array output stream. The buffer capacity is 
72       * initially 1024 bytes, though its size increases if necessary. 
73       */
74      public ByteArrayOutputStream() {
75          this(1024);
76      }
77  
78      /**
79       * Creates a new byte array output stream, with a buffer capacity of 
80       * the specified size, in bytes. 
81       *
82       * @param size  the initial size
83       * @throws IllegalArgumentException if size is negative
84       */
85      public ByteArrayOutputStream(int size) {
86          if (size < 0) {
87              throw new IllegalArgumentException(
88                  "Negative initial size: " + size);
89          }
90          needNewBuffer(size);
91      }
92  
93      /**
94       * Makes a new buffer available either by allocating
95       * a new one or re-cycling an existing one.
96       *
97       * @param newcount  the size of the buffer if one is created
98       */
99      private void needNewBuffer(int newcount) {
100         if (currentBufferIndex < buffers.size() - 1) {
101             //Recycling old buffer
102             filledBufferSum += currentBuffer.length;
103             
104             currentBufferIndex++;
105             currentBuffer = buffers.get(currentBufferIndex);
106         } else {
107             //Creating new buffer
108             int newBufferSize;
109             if (currentBuffer == null) {
110                 newBufferSize = newcount;
111                 filledBufferSum = 0;
112             } else {
113                 newBufferSize = Math.max(
114                     currentBuffer.length << 1, 
115                     newcount - filledBufferSum);
116                 filledBufferSum += currentBuffer.length;
117             }
118             
119             currentBufferIndex++;
120             currentBuffer = new byte[newBufferSize];
121             buffers.add(currentBuffer);
122         }
123     }
124 
125     /**
126      * Write the bytes to byte array.
127      * @param b the bytes to write
128      * @param off The start offset
129      * @param len The number of bytes to write
130      */
131     @Override
132     public void write(byte[] b, int off, int len) {
133         if ((off < 0) 
134                 || (off > b.length) 
135                 || (len < 0) 
136                 || ((off + len) > b.length) 
137                 || ((off + len) < 0)) {
138             throw new IndexOutOfBoundsException();
139         } else if (len == 0) {
140             return;
141         }
142         synchronized (this) {
143             int newcount = count + len;
144             int remaining = len;
145             int inBufferPos = count - filledBufferSum;
146             while (remaining > 0) {
147                 int part = Math.min(remaining, currentBuffer.length - inBufferPos);
148                 System.arraycopy(b, off + len - remaining, currentBuffer, inBufferPos, part);
149                 remaining -= part;
150                 if (remaining > 0) {
151                     needNewBuffer(newcount);
152                     inBufferPos = 0;
153                 }
154             }
155             count = newcount;
156         }
157     }
158 
159     /**
160      * Write a byte to byte array.
161      * @param b the byte to write
162      */
163     @Override
164     public synchronized void write(int b) {
165         int inBufferPos = count - filledBufferSum;
166         if (inBufferPos == currentBuffer.length) {
167             needNewBuffer(count + 1);
168             inBufferPos = 0;
169         }
170         currentBuffer[inBufferPos] = (byte) b;
171         count++;
172     }
173 
174     /**
175      * Writes the entire contents of the specified input stream to this
176      * byte stream. Bytes from the input stream are read directly into the
177      * internal buffers of this streams.
178      *
179      * @param in the input stream to read from
180      * @return total number of bytes read from the input stream
181      *         (and written to this stream)
182      * @throws IOException if an I/O error occurs while reading the input stream
183      * @since Commons IO 1.4
184      */
185     public synchronized int write(InputStream in) throws IOException {
186         int readCount = 0;
187         int inBufferPos = count - filledBufferSum;
188         int n = in.read(currentBuffer, inBufferPos, currentBuffer.length - inBufferPos);
189         while (n != -1) {
190             readCount += n;
191             inBufferPos += n;
192             count += n;
193             if (inBufferPos == currentBuffer.length) {
194                 needNewBuffer(currentBuffer.length);
195                 inBufferPos = 0;
196             }
197             n = in.read(currentBuffer, inBufferPos, currentBuffer.length - inBufferPos);
198         }
199         return readCount;
200     }
201 
202     /**
203      * Return the current size of the byte array.
204      * @return the current size of the byte array
205      */
206     public synchronized int size() {
207         return count;
208     }
209 
210     /**
211      * Closing a <tt>ByteArrayOutputStream</tt> has no effect. The methods in
212      * this class can be called after the stream has been closed without
213      * generating an <tt>IOException</tt>.
214      *
215      * @throws IOException never (this method should not declare this exception
216      * but it has to now due to backwards compatability)
217      */
218     @Override
219     public void close() throws IOException {
220         //nop
221     }
222 
223     /**
224      * @see java.io.ByteArrayOutputStream#reset()
225      */
226     public synchronized void reset() {
227         count = 0;
228         filledBufferSum = 0;
229         currentBufferIndex = 0;
230         currentBuffer = buffers.get(currentBufferIndex);
231     }
232 
233     /**
234      * Writes the entire contents of this byte stream to the
235      * specified output stream.
236      *
237      * @param out  the output stream to write to
238      * @throws IOException if an I/O error occurs, such as if the stream is closed
239      * @see java.io.ByteArrayOutputStream#writeTo(OutputStream)
240      */
241     public synchronized void writeTo(OutputStream out) throws IOException {
242         int remaining = count;
243         for (byte[] buf : buffers) {
244             int c = Math.min(buf.length, remaining);
245             out.write(buf, 0, c);
246             remaining -= c;
247             if (remaining == 0) {
248                 break;
249             }
250         }
251     }
252 
253     /**
254      * Fetches entire contents of an <code>InputStream</code> and represent
255      * same data as result InputStream.
256      * <p>
257      * This method is useful where,
258      * <ul>
259      * <li>Source InputStream is slow.</li>
260      * <li>It has network resources associated, so we cannot keep it open for
261      * long time.</li>
262      * <li>It has network timeout associated.</li>
263      * </ul>
264      * It can be used in favor of {@link #toByteArray()}, since it
265      * avoids unnecessary allocation and copy of byte[].<br>
266      * This method buffers the input internally, so there is no need to use a
267      * <code>BufferedInputStream</code>.
268      * 
269      * @param input Stream to be fully buffered.
270      * @return A fully buffered stream.
271      * @throws IOException if an I/O error occurs
272      */
273     public static InputStream toBufferedInputStream(InputStream input)
274             throws IOException {
275         ByteArrayOutputStream output = new ByteArrayOutputStream();
276         output.write(input);
277         return output.toBufferedInputStream();
278     }
279 
280     /**
281      * Gets the current contents of this byte stream as a Input Stream. The
282      * returned stream is backed by buffers of <code>this</code> stream,
283      * avoiding memory allocation and copy, thus saving space and time.<br>
284      * 
285      * @return the current contents of this output stream.
286      * @see java.io.ByteArrayOutputStream#toByteArray()
287      * @see #reset()
288      * @since Commons IO 2.0
289      */
290     private InputStream toBufferedInputStream() {
291         int remaining = count;
292         if (remaining == 0) {
293             return new ClosedInputStream();
294         }
295         List<ByteArrayInputStream> list = new ArrayList<ByteArrayInputStream>(buffers.size());
296         for (byte[] buf : buffers) {
297             int c = Math.min(buf.length, remaining);
298             list.add(new ByteArrayInputStream(buf, 0, c));
299             remaining -= c;
300             if (remaining == 0) {
301                 break;
302             }
303         }
304         return new SequenceInputStream(Collections.enumeration(list));
305     }
306 
307     /**
308      * Gets the curent contents of this byte stream as a byte array.
309      * The result is independent of this stream.
310      *
311      * @return the current contents of this output stream, as a byte array
312      * @see java.io.ByteArrayOutputStream#toByteArray()
313      */
314     public synchronized byte[] toByteArray() {
315         int remaining = count;
316         if (remaining == 0) {
317             return EMPTY_BYTE_ARRAY; 
318         }
319         byte newbuf[] = new byte[remaining];
320         int pos = 0;
321         for (byte[] buf : buffers) {
322             int c = Math.min(buf.length, remaining);
323             System.arraycopy(buf, 0, newbuf, pos, c);
324             pos += c;
325             remaining -= c;
326             if (remaining == 0) {
327                 break;
328             }
329         }
330         return newbuf;
331     }
332 
333     /**
334      * Gets the curent contents of this byte stream as a string.
335      * @return the contents of the byte array as a String
336      * @see java.io.ByteArrayOutputStream#toString()
337      */
338     @Override
339     public String toString() {
340         return new String(toByteArray());
341     }
342 
343     /**
344      * Gets the curent contents of this byte stream as a string
345      * using the specified encoding.
346      *
347      * @param enc  the name of the character encoding
348      * @return the string converted from the byte array
349      * @throws UnsupportedEncodingException if the encoding is not supported
350      * @see java.io.ByteArrayOutputStream#toString(String)
351      */
352     public String toString(String enc) throws UnsupportedEncodingException {
353         return new String(toByteArray(), enc);
354     }
355 
356 }