View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.mp3;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.UnsupportedEncodingException;
22  
23  import org.apache.tika.exception.TikaException;
24  import org.xml.sax.ContentHandler;
25  import org.xml.sax.SAXException;
26  
27  /**
28   * This is used to parse ID3 Version 1 Tag information from an MP3 file, 
29   * if available.
30   *
31   * @see <a href="http://www.id3.org/ID3v1">MP3 ID3 Version 1 specification</a>
32   */
33  public class ID3v1Handler implements ID3Tags {
34      private String title;
35      private String artist;
36      private String album;
37      private String year;
38      private String comment;
39      private String genre;
40      private String trackNumber;
41  
42      boolean found = false;
43  
44      public ID3v1Handler(InputStream stream, ContentHandler handler)
45              throws IOException, SAXException, TikaException {
46          this(getSuffix(stream, 128));
47      }
48  
49      /**
50       * Creates from the last 128 bytes of a stream.
51       * @param tagData Must be the last 128 bytes 
52       */
53      protected ID3v1Handler(byte[] tagData)
54              throws IOException, SAXException, TikaException {
55          if (tagData.length == 128
56                  && tagData[0] == 'T' && tagData[1] == 'A' && tagData[2] == 'G') {
57              found = true;
58  
59              title = getString(tagData, 3, 33);
60              artist = getString(tagData, 33, 63);
61              album = getString(tagData, 63, 93);
62              year = getString(tagData, 93, 97);
63              comment = getString(tagData, 97, 127);
64  
65              int genreID = (int) tagData[127] & 0xff; // unsigned byte
66              genre = GENRES[Math.min(genreID, GENRES.length - 1)];
67  
68              // ID3v1.1 Track addition
69              // If the last two bytes of the comment field are zero and
70              // non-zero, then the last byte is the track number
71              if (tagData[125] == 0 && tagData[126] != 0) {
72                  int trackNum = (int) tagData[126] & 0xff;
73                  trackNumber = Integer.toString(trackNum);
74              }
75          }
76      }
77  
78  
79      public boolean getTagsPresent() {
80          return found;
81      }
82  
83      public String getTitle() {
84          return title;
85      }
86  
87      public String getArtist() {
88          return artist;
89      }
90  
91      public String getAlbum() {
92          return album;
93      }
94  
95      public String getYear() {
96          return year;
97      }
98  
99      public String getComment() {
100         return comment;
101     }
102 
103     public String getGenre() {
104         return genre;
105     }
106 
107     public String getTrackNumber() {
108         return trackNumber;
109     }
110 
111     /**
112      * Returns the identified ISO-8859-1 substring from the given byte buffer.
113      * The return value is the zero-terminated substring retrieved from
114      * between the given start and end positions in the given byte buffer.
115      * Extra whitespace (and control characters) from the beginning and the
116      * end of the substring is removed.
117      *
118      * @param buffer byte buffer
119      * @param start start index of the substring
120      * @param end end index of the substring
121      * @return the identified substring
122      * @throws TikaException if the ISO-8859-1 encoding is not available
123      */
124     private static String getString(byte[] buffer, int start, int end)
125             throws TikaException {
126         // Find the zero byte that marks the end of the string
127         int zero = start;
128         while (zero < end && buffer[zero] != 0) {
129             zero++;
130         }
131 
132         // Skip trailing whitespace
133         end = zero;
134         while (start < end && buffer[end - 1] <= ' ') {
135             end--;
136         }
137 
138         // Skip leading whitespace
139         while (start < end && buffer[start] <= ' ') {
140             start++;
141         }
142 
143         // Return the remaining substring
144         try {
145             return new String(buffer, start, end - start, "ISO-8859-1");
146         } catch (UnsupportedEncodingException e) {
147             throw new TikaException("ISO-8859-1 encoding is not available", e);
148         }
149     }
150 
151     /**
152      * Reads and returns the last <code>length</code> bytes from the
153      * given stream.
154      * @param stream input stream
155      * @param length number of bytes from the end to read and return
156      * @return stream the <code>InputStream</code> to read from.
157      * @throws IOException if the stream could not be read from.
158      */
159     private static byte[] getSuffix(InputStream stream, int length)
160             throws IOException {
161         byte[] buffer = new byte[2 * length];
162         int bytesInBuffer = 0;
163 
164         int n = stream.read(buffer);
165         while (n != -1) {
166             bytesInBuffer += n;
167             if (bytesInBuffer == buffer.length) {
168                 System.arraycopy(buffer, bytesInBuffer - length, buffer, 0, length);
169                 bytesInBuffer = length;
170             }
171             n = stream.read(buffer, bytesInBuffer, buffer.length - bytesInBuffer);
172         }
173 
174         if (bytesInBuffer < length) {
175             length = bytesInBuffer;
176         }
177 
178         byte[] result = new byte[length];
179         System.arraycopy(buffer, bytesInBuffer - length, result, 0, length);
180         return result;
181     }
182 
183 }