View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.mp3;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.util.ArrayList;
22  import java.util.Collections;
23  import java.util.List;
24  import java.util.Set;
25  
26  import org.apache.tika.exception.TikaException;
27  import org.apache.tika.metadata.Metadata;
28  import org.apache.tika.metadata.XMPDM;
29  import org.apache.tika.mime.MediaType;
30  import org.apache.tika.parser.ParseContext;
31  import org.apache.tika.parser.Parser;
32  import org.apache.tika.sax.XHTMLContentHandler;
33  import org.xml.sax.ContentHandler;
34  import org.xml.sax.SAXException;
35  
36  /**
37   * The <code>Mp3Parser</code> is used to parse ID3 Version 1 Tag information
38   * from an MP3 file, if available.
39   *
40   * @see <a href="http://www.id3.org/ID3v1">MP3 ID3 Version 1 specification</a>
41   * @see <a href="http://www.id3.org/id3v2.4.0-structure">MP3 ID3 Version 2.4 Structure Specification</a>
42   * @see <a href="http://www.id3.org/id3v2.4.0-frames">MP3 ID3 Version 2.4 Frames Specification</a>
43   */
44  public class Mp3Parser implements Parser {
45  
46      private static final Set<MediaType> SUPPORTED_TYPES =
47          Collections.singleton(MediaType.audio("mpeg"));
48  
49      public Set<MediaType> getSupportedTypes(ParseContext context) {
50          return SUPPORTED_TYPES;
51      }
52  
53  
54      public void parse(
55              InputStream stream, ContentHandler handler,
56              Metadata metadata, ParseContext context)
57              throws IOException, SAXException, TikaException {
58          metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");
59  
60          XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
61          xhtml.startDocument();
62  
63          // Create handlers for the various kinds of ID3 tags
64          ID3TagsAndAudio audioAndTags = getAllTagHandlers(stream, handler);
65  
66          if (audioAndTags.tags.length > 0) {
67             CompositeTagHandler tag = new CompositeTagHandler(audioAndTags.tags);
68  
69             metadata.set(Metadata.TITLE, tag.getTitle());
70             metadata.set(Metadata.AUTHOR, tag.getArtist());
71             metadata.set(XMPDM.ARTIST, tag.getArtist());
72             metadata.set(XMPDM.ALBUM, tag.getAlbum());
73             metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
74             metadata.set(XMPDM.GENRE, tag.getGenre());
75             metadata.set(XMPDM.LOG_COMMENT, tag.getComment());
76  
77             xhtml.element("h1", tag.getTitle());
78             xhtml.element("p", tag.getArtist());
79  
80              // ID3v1.1 Track addition
81              if (tag.getTrackNumber() != null) {
82                  xhtml.element("p", tag.getAlbum() + ", track " + tag.getTrackNumber());
83                  metadata.set(XMPDM.TRACK_NUMBER, tag.getTrackNumber());
84              } else {
85                  xhtml.element("p", tag.getAlbum());
86              }
87              xhtml.element("p", tag.getYear());
88              xhtml.element("p", tag.getComment());
89              xhtml.element("p", tag.getGenre());
90          }
91          if (audioAndTags.audio != null) {
92              metadata.set("samplerate", String.valueOf(audioAndTags.audio.getSampleRate()));
93              metadata.set("channels", String.valueOf(audioAndTags.audio.getChannels()));
94              metadata.set("version", audioAndTags.audio.getVersion());
95              metadata.set(
96                      XMPDM.AUDIO_SAMPLE_RATE,
97                      Integer.toString(audioAndTags.audio.getSampleRate()));
98          }
99  
100         xhtml.endDocument();
101     }
102 
103     /**
104      * @deprecated This method will be removed in Apache Tika 1.0.
105      */
106     public void parse(
107             InputStream stream, ContentHandler handler, Metadata metadata)
108             throws IOException, SAXException, TikaException {
109         parse(stream, handler, metadata, new ParseContext());
110     }
111 
112     /**
113      * Scans the MP3 frames for ID3 tags, and creates ID3Tag Handlers
114      *  for each supported set of tags. 
115      */
116     protected static ID3TagsAndAudio getAllTagHandlers(InputStream stream, ContentHandler handler)
117            throws IOException, SAXException, TikaException {
118        ID3v24Handler v24 = null;
119        ID3v23Handler v23 = null;
120        ID3v22Handler v22 = null;
121        ID3v1Handler v1 = null;
122        AudioFrame firstAudio = null;
123 
124        // ID3v2 tags live at the start of the file
125        // You can apparently have several different ID3 tag blocks
126        // So, keep going until we don't find any more
127        MP3Frame f;
128        while ((f = ID3v2Frame.createFrameIfPresent(stream)) != null && firstAudio == null) {
129            if(f instanceof ID3v2Frame) {
130                ID3v2Frame id3F = (ID3v2Frame)f;
131                if (id3F.getMajorVersion() == 4) {
132                    v24 = new ID3v24Handler(id3F);
133                } else if(id3F.getMajorVersion() == 3) {
134                    v23 = new ID3v23Handler(id3F);
135                } else if(id3F.getMajorVersion() == 2) {
136                    v22 = new ID3v22Handler(id3F);
137                }
138            } else if(f instanceof AudioFrame) {
139                firstAudio = (AudioFrame)f;
140            }
141        }
142 
143        // ID3v1 tags live at the end of the file
144        // Our handler handily seeks to the end for us
145        v1 = new ID3v1Handler(stream, handler);
146 
147        // Go in order of preference
148        // Currently, that's newest to oldest
149        List<ID3Tags> tags = new ArrayList<ID3Tags>();
150 
151        if(v24 != null && v24.getTagsPresent()) {
152           tags.add(v24);
153        }
154        if(v23 != null && v23.getTagsPresent()) {
155           tags.add(v23);
156        }
157        if(v22 != null && v22.getTagsPresent()) {
158           tags.add(v22);
159        }
160        if(v1 != null && v1.getTagsPresent()) {
161           tags.add(v1);
162        }
163        
164        ID3TagsAndAudio ret = new ID3TagsAndAudio();
165        ret.audio = firstAudio;
166        ret.tags = tags.toArray(new ID3Tags[tags.size()]);
167        return ret;
168     }
169 
170     protected static class ID3TagsAndAudio {
171         private ID3Tags[] tags;
172         private AudioFrame audio;
173     }
174 
175 }