View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.mp3;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.UnsupportedEncodingException;
22  import java.util.Iterator;
23  
24  /**
25   * A frame of ID3v2 data, which is then passed to a handler to 
26   * be turned into useful data.
27   */
28  public class ID3v2Frame implements MP3Frame {
29      private int majorVersion;
30      private int minorVersion;
31      private int flags;
32      private int length;
33      /** Excludes the header size part */
34      private byte[] extendedHeader;
35      private byte[] data;
36  
37      public int getMajorVersion() {
38          return majorVersion;
39      }
40  
41      public int getMinorVersion() {
42          return minorVersion;
43      }
44  
45      public int getFlags() {
46          return flags;
47      }
48  
49      public int getLength() {
50          return length;
51      }
52  
53      public byte[] getExtendedHeader() {
54          return extendedHeader;
55      }
56  
57      public byte[] getData() {
58          return data;
59      }
60  
61      /**
62       * Returns the next Frame (ID3v2 or Audio) in
63       *  the file, or null if the next batch of data
64       *  doesn't correspond to either an ID3v2 Frame
65       *  or an Audio Frame.
66       * ID3v2 Frames should come before all Audio ones.
67       */
68      public static MP3Frame createFrameIfPresent(InputStream inp)
69              throws IOException {
70          int h1 = inp.read();
71          int h2 = inp.read();
72          int h3 = inp.read();
73          
74          // Is it an ID3v2 Frame? 
75          if (h1 == (int)'I' && h2 == (int)'D' && h3 == (int)'3') {
76              int majorVersion = inp.read();
77              int minorVersion = inp.read();
78              if (majorVersion == -1 || minorVersion == -1) {
79                  return null;
80              }
81              return new ID3v2Frame(majorVersion, minorVersion, inp);
82          }
83          
84          // Is it an Audio Frame?
85          int h4 = inp.read();
86          if (AudioFrame.isAudioHeader(h1, h2, h3, h4)) {
87              return new AudioFrame(h1, h2, h3, h4, inp);
88          }
89  
90          // Not a frame header
91          return null;
92      }
93  
94      private ID3v2Frame(int majorVersion, int minorVersion, InputStream inp)
95              throws IOException {
96          this.majorVersion = majorVersion;
97          this.minorVersion = minorVersion;
98  
99          // Get the flags and the length
100         flags = inp.read();
101         length = get7BitsInt(readFully(inp, 4), 0);
102 
103         // Do we have an extended header?
104         if ((flags & 0x02) == 0x02) {
105             int size = getInt(readFully(inp, 4));
106             extendedHeader = readFully(inp, size);
107         }
108 
109         // Get the frame's data
110         data = readFully(inp, length);
111     }
112 
113     protected static int getInt(byte[] data) {
114         return getInt(data, 0);
115     }
116 
117     protected static int getInt(byte[] data, int offset) {
118         int b0 = data[offset+0] & 0xFF;
119         int b1 = data[offset+1] & 0xFF;
120         int b2 = data[offset+2] & 0xFF;
121         int b3 = data[offset+3] & 0xFF;
122         return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0);
123     }
124 
125     protected static int getInt3(byte[] data, int offset) {
126         int b0 = data[offset+0] & 0xFF;
127         int b1 = data[offset+1] & 0xFF;
128         int b2 = data[offset+2] & 0xFF;
129         return (b0 << 16) + (b1 << 8) + (b2 << 0);
130     }
131 
132     protected static int getInt2(byte[] data, int offset) {
133         int b0 = data[offset+0] & 0xFF;
134         int b1 = data[offset+1] & 0xFF;
135         return (b0 << 8) + (b1 << 0);
136     }
137 
138     /**
139      * AKA a Synchsafe integer.
140      * 4 bytes hold a 28 bit number. The highest
141      *  bit in each byte is always 0 and always ignored.
142      */
143     protected static int get7BitsInt(byte[] data, int offset) {
144         int b0 = data[offset+0] & 0x7F;
145         int b1 = data[offset+1] & 0x7F;
146         int b2 = data[offset+2] & 0x7F;
147         int b3 = data[offset+3] & 0x7F;
148         return (b0 << 21) + (b1 << 14) + (b2 << 7) + (b3 << 0);
149     }
150 
151     protected static byte[] readFully(InputStream inp, int length)
152             throws IOException {
153         byte[] b = new byte[length];
154 
155         int pos = 0;
156         int read;
157         while (pos < length) {
158             read = inp.read(b, pos, length-pos);
159             if (read == -1) {
160                 throw new IOException("Tried to read " + length + " bytes, but only " + pos + " bytes present"); 
161             }
162             pos += read;
163         }
164 
165         return b;
166     }
167 
168     /**
169      * Returns the (possibly null padded) String at the given offset and
170      * length. String encoding is held in the first byte; 
171      */
172     protected static String getTagString(byte[] data, int offset, int length) {
173         int actualLength = length;
174         while (data[actualLength-1] == 0) {
175             actualLength--;
176         }
177 
178         // Does it have an encoding flag?
179         // Detect by the first byte being sub 0x20
180         String encoding = "ISO-8859-1";
181         byte maybeEncodingFlag = data[offset];
182         if (maybeEncodingFlag == 0 || maybeEncodingFlag == 1) {
183             offset++;
184             actualLength--;
185             if (maybeEncodingFlag == 1) {
186                 // With BOM
187                 encoding = "UTF-16";
188             } else if (maybeEncodingFlag == 2) {
189                 // Without BOM
190                 encoding = "UTF-16BE";
191             } else if (maybeEncodingFlag == 3) {
192                 encoding = "UTF8";
193             }
194         }
195 
196         try {
197             return new String(data, offset, actualLength, encoding);
198         } catch (UnsupportedEncodingException e) {
199             throw new RuntimeException(
200                     "Core encoding " + encoding + " is not available", e);
201         }
202     }
203 
204     /**
205      * Returns the String at the given
206      *  offset and length. Strings are ISO-8859-1 
207      */
208     protected static String getString(byte[] data, int offset, int length) {
209         try {
210             return new String(data, offset, length, "ISO-8859-1");
211         } catch (UnsupportedEncodingException e) {
212             throw new RuntimeException(
213                     "Core encoding ISO-8859-1 encoding is not available", e);
214         }
215     }
216 
217 
218     /**
219      * Iterates over id3v2 raw tags.
220      * Create an instance of this that configures the
221      *  various length and multipliers.
222      */
223     protected class RawTagIterator implements Iterator<RawTag> {
224         private int nameLength;
225         private int sizeLength;
226         private int sizeMultiplier;
227         private int flagLength;
228 
229         private int offset = 0;
230 
231         protected RawTagIterator(
232                 int nameLength, int sizeLength, int sizeMultiplier,
233                 int flagLength) {
234             this.nameLength = nameLength;
235             this.sizeLength = sizeLength;
236             this.sizeMultiplier = sizeMultiplier;
237             this.flagLength = flagLength;
238         }
239 
240         public boolean hasNext() {
241             // Check for padding at the end
242             return offset < data.length && data[offset] != 0;
243         }
244 
245         public RawTag next() {
246             RawTag tag = new RawTag(nameLength, sizeLength, sizeMultiplier,
247                     flagLength, data, offset);
248             offset += tag.getSize();
249             return tag;
250         }
251 
252         public void remove() {
253         }
254 
255     }
256 
257     protected static class RawTag {
258         private int headerSize;
259         protected String name;
260         protected int flag;
261         protected byte[] data;
262 
263         private RawTag(
264                 int nameLength, int sizeLength, int sizeMultiplier,
265                 int flagLength, byte[] frameData, int offset) {
266             headerSize = nameLength + sizeLength + flagLength;
267 
268             // Name, normally 3 or 4 bytes
269             name = getString(frameData, offset, nameLength);
270 
271             // Size
272             int rawSize;
273             if (sizeLength == 3) {
274                 rawSize = getInt3(frameData, offset+nameLength);
275             } else {
276                 rawSize = getInt(frameData, offset+nameLength);
277             }
278             int size = rawSize * sizeMultiplier;
279 
280             // Flag
281             if (flagLength > 0) {
282                 if (flagLength == 1) {
283                     flag = (int)frameData[offset+nameLength+sizeLength];
284                 } else {
285                     flag = getInt2(frameData, offset+nameLength+sizeLength);
286                 }
287             }
288 
289             // Now data
290             data = new byte[size];
291             System.arraycopy(frameData, 
292                     offset+nameLength+sizeLength+flagLength, data, 0, size);
293         }
294 
295         protected int getSize() {
296             return headerSize + data.length;
297         }
298 
299     }
300 
301 }