1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.tika.io;
18
19 import java.io.ByteArrayInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.OutputStream;
23 import java.io.SequenceInputStream;
24 import java.io.UnsupportedEncodingException;
25 import java.util.ArrayList;
26 import java.util.Collections;
27 import java.util.List;
28
29 /**
30 * This class implements an output stream in which the data is
31 * written into a byte array. The buffer automatically grows as data
32 * is written to it.
33 * <p>
34 * The data can be retrieved using <code>toByteArray()</code> and
35 * <code>toString()</code>.
36 * <p>
37 * Closing a <tt>ByteArrayOutputStream</tt> has no effect. The methods in
38 * this class can be called after the stream has been closed without
39 * generating an <tt>IOException</tt>.
40 * <p>
41 * This is an alternative implementation of the java.io.ByteArrayOutputStream
42 * class. The original implementation only allocates 32 bytes at the beginning.
43 * As this class is designed for heavy duty it starts at 1024 bytes. In contrast
44 * to the original it doesn't reallocate the whole memory block but allocates
45 * additional buffers. This way no buffers need to be garbage collected and
46 * the contents don't have to be copied to the new buffer. This class is
47 * designed to behave exactly like the original. The only exception is the
48 * deprecated toString(int) method that has been ignored.
49 *
50 * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
51 * @author Holger Hoffstatte
52 * @since Apache Tika 0.4, copied from Commons IO 1.4
53 */
54 public class ByteArrayOutputStream extends OutputStream {
55
56 /** A singleton empty byte array. */
57 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
58
59 /** The list of buffers, which grows and never reduces. */
60 private final List<byte[]> buffers = new ArrayList<byte[]>();
61 /** The index of the current buffer. */
62 private int currentBufferIndex;
63 /** The total count of bytes in all the filled buffers. */
64 private int filledBufferSum;
65 /** The current buffer. */
66 private byte[] currentBuffer;
67 /** The total count of bytes written. */
68 private int count;
69
70 /**
71 * Creates a new byte array output stream. The buffer capacity is
72 * initially 1024 bytes, though its size increases if necessary.
73 */
74 public ByteArrayOutputStream() {
75 this(1024);
76 }
77
78 /**
79 * Creates a new byte array output stream, with a buffer capacity of
80 * the specified size, in bytes.
81 *
82 * @param size the initial size
83 * @throws IllegalArgumentException if size is negative
84 */
85 public ByteArrayOutputStream(int size) {
86 if (size < 0) {
87 throw new IllegalArgumentException(
88 "Negative initial size: " + size);
89 }
90 needNewBuffer(size);
91 }
92
93 /**
94 * Makes a new buffer available either by allocating
95 * a new one or re-cycling an existing one.
96 *
97 * @param newcount the size of the buffer if one is created
98 */
99 private void needNewBuffer(int newcount) {
100 if (currentBufferIndex < buffers.size() - 1) {
101 //Recycling old buffer
102 filledBufferSum += currentBuffer.length;
103
104 currentBufferIndex++;
105 currentBuffer = buffers.get(currentBufferIndex);
106 } else {
107 //Creating new buffer
108 int newBufferSize;
109 if (currentBuffer == null) {
110 newBufferSize = newcount;
111 filledBufferSum = 0;
112 } else {
113 newBufferSize = Math.max(
114 currentBuffer.length << 1,
115 newcount - filledBufferSum);
116 filledBufferSum += currentBuffer.length;
117 }
118
119 currentBufferIndex++;
120 currentBuffer = new byte[newBufferSize];
121 buffers.add(currentBuffer);
122 }
123 }
124
125 /**
126 * Write the bytes to byte array.
127 * @param b the bytes to write
128 * @param off The start offset
129 * @param len The number of bytes to write
130 */
131 @Override
132 public void write(byte[] b, int off, int len) {
133 if ((off < 0)
134 || (off > b.length)
135 || (len < 0)
136 || ((off + len) > b.length)
137 || ((off + len) < 0)) {
138 throw new IndexOutOfBoundsException();
139 } else if (len == 0) {
140 return;
141 }
142 synchronized (this) {
143 int newcount = count + len;
144 int remaining = len;
145 int inBufferPos = count - filledBufferSum;
146 while (remaining > 0) {
147 int part = Math.min(remaining, currentBuffer.length - inBufferPos);
148 System.arraycopy(b, off + len - remaining, currentBuffer, inBufferPos, part);
149 remaining -= part;
150 if (remaining > 0) {
151 needNewBuffer(newcount);
152 inBufferPos = 0;
153 }
154 }
155 count = newcount;
156 }
157 }
158
159 /**
160 * Write a byte to byte array.
161 * @param b the byte to write
162 */
163 @Override
164 public synchronized void write(int b) {
165 int inBufferPos = count - filledBufferSum;
166 if (inBufferPos == currentBuffer.length) {
167 needNewBuffer(count + 1);
168 inBufferPos = 0;
169 }
170 currentBuffer[inBufferPos] = (byte) b;
171 count++;
172 }
173
174 /**
175 * Writes the entire contents of the specified input stream to this
176 * byte stream. Bytes from the input stream are read directly into the
177 * internal buffers of this streams.
178 *
179 * @param in the input stream to read from
180 * @return total number of bytes read from the input stream
181 * (and written to this stream)
182 * @throws IOException if an I/O error occurs while reading the input stream
183 * @since Commons IO 1.4
184 */
185 public synchronized int write(InputStream in) throws IOException {
186 int readCount = 0;
187 int inBufferPos = count - filledBufferSum;
188 int n = in.read(currentBuffer, inBufferPos, currentBuffer.length - inBufferPos);
189 while (n != -1) {
190 readCount += n;
191 inBufferPos += n;
192 count += n;
193 if (inBufferPos == currentBuffer.length) {
194 needNewBuffer(currentBuffer.length);
195 inBufferPos = 0;
196 }
197 n = in.read(currentBuffer, inBufferPos, currentBuffer.length - inBufferPos);
198 }
199 return readCount;
200 }
201
202 /**
203 * Return the current size of the byte array.
204 * @return the current size of the byte array
205 */
206 public synchronized int size() {
207 return count;
208 }
209
210 /**
211 * Closing a <tt>ByteArrayOutputStream</tt> has no effect. The methods in
212 * this class can be called after the stream has been closed without
213 * generating an <tt>IOException</tt>.
214 *
215 * @throws IOException never (this method should not declare this exception
216 * but it has to now due to backwards compatability)
217 */
218 @Override
219 public void close() throws IOException {
220 //nop
221 }
222
223 /**
224 * @see java.io.ByteArrayOutputStream#reset()
225 */
226 public synchronized void reset() {
227 count = 0;
228 filledBufferSum = 0;
229 currentBufferIndex = 0;
230 currentBuffer = buffers.get(currentBufferIndex);
231 }
232
233 /**
234 * Writes the entire contents of this byte stream to the
235 * specified output stream.
236 *
237 * @param out the output stream to write to
238 * @throws IOException if an I/O error occurs, such as if the stream is closed
239 * @see java.io.ByteArrayOutputStream#writeTo(OutputStream)
240 */
241 public synchronized void writeTo(OutputStream out) throws IOException {
242 int remaining = count;
243 for (byte[] buf : buffers) {
244 int c = Math.min(buf.length, remaining);
245 out.write(buf, 0, c);
246 remaining -= c;
247 if (remaining == 0) {
248 break;
249 }
250 }
251 }
252
253 /**
254 * Fetches entire contents of an <code>InputStream</code> and represent
255 * same data as result InputStream.
256 * <p>
257 * This method is useful where,
258 * <ul>
259 * <li>Source InputStream is slow.</li>
260 * <li>It has network resources associated, so we cannot keep it open for
261 * long time.</li>
262 * <li>It has network timeout associated.</li>
263 * </ul>
264 * It can be used in favor of {@link #toByteArray()}, since it
265 * avoids unnecessary allocation and copy of byte[].<br>
266 * This method buffers the input internally, so there is no need to use a
267 * <code>BufferedInputStream</code>.
268 *
269 * @param input Stream to be fully buffered.
270 * @return A fully buffered stream.
271 * @throws IOException if an I/O error occurs
272 */
273 public static InputStream toBufferedInputStream(InputStream input)
274 throws IOException {
275 ByteArrayOutputStream output = new ByteArrayOutputStream();
276 output.write(input);
277 return output.toBufferedInputStream();
278 }
279
280 /**
281 * Gets the current contents of this byte stream as a Input Stream. The
282 * returned stream is backed by buffers of <code>this</code> stream,
283 * avoiding memory allocation and copy, thus saving space and time.<br>
284 *
285 * @return the current contents of this output stream.
286 * @see java.io.ByteArrayOutputStream#toByteArray()
287 * @see #reset()
288 * @since Commons IO 2.0
289 */
290 private InputStream toBufferedInputStream() {
291 int remaining = count;
292 if (remaining == 0) {
293 return new ClosedInputStream();
294 }
295 List<ByteArrayInputStream> list = new ArrayList<ByteArrayInputStream>(buffers.size());
296 for (byte[] buf : buffers) {
297 int c = Math.min(buf.length, remaining);
298 list.add(new ByteArrayInputStream(buf, 0, c));
299 remaining -= c;
300 if (remaining == 0) {
301 break;
302 }
303 }
304 return new SequenceInputStream(Collections.enumeration(list));
305 }
306
307 /**
308 * Gets the curent contents of this byte stream as a byte array.
309 * The result is independent of this stream.
310 *
311 * @return the current contents of this output stream, as a byte array
312 * @see java.io.ByteArrayOutputStream#toByteArray()
313 */
314 public synchronized byte[] toByteArray() {
315 int remaining = count;
316 if (remaining == 0) {
317 return EMPTY_BYTE_ARRAY;
318 }
319 byte newbuf[] = new byte[remaining];
320 int pos = 0;
321 for (byte[] buf : buffers) {
322 int c = Math.min(buf.length, remaining);
323 System.arraycopy(buf, 0, newbuf, pos, c);
324 pos += c;
325 remaining -= c;
326 if (remaining == 0) {
327 break;
328 }
329 }
330 return newbuf;
331 }
332
333 /**
334 * Gets the curent contents of this byte stream as a string.
335 * @return the contents of the byte array as a String
336 * @see java.io.ByteArrayOutputStream#toString()
337 */
338 @Override
339 public String toString() {
340 return new String(toByteArray());
341 }
342
343 /**
344 * Gets the curent contents of this byte stream as a string
345 * using the specified encoding.
346 *
347 * @param enc the name of the character encoding
348 * @return the string converted from the byte array
349 * @throws UnsupportedEncodingException if the encoding is not supported
350 * @see java.io.ByteArrayOutputStream#toString(String)
351 */
352 public String toString(String enc) throws UnsupportedEncodingException {
353 return new String(toByteArray(), enc);
354 }
355
356 }