View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.audio;
18  
19  import java.io.BufferedInputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.util.Arrays;
23  import java.util.Collections;
24  import java.util.HashSet;
25  import java.util.Set;
26  import java.util.Map.Entry;
27  
28  import javax.sound.sampled.AudioFileFormat;
29  import javax.sound.sampled.AudioFormat;
30  import javax.sound.sampled.AudioSystem;
31  import javax.sound.sampled.UnsupportedAudioFileException;
32  import javax.sound.sampled.AudioFileFormat.Type;
33  
34  import org.apache.tika.exception.TikaException;
35  import org.apache.tika.metadata.Metadata;
36  import org.apache.tika.metadata.XMPDM;
37  import org.apache.tika.mime.MediaType;
38  import org.apache.tika.parser.ParseContext;
39  import org.apache.tika.parser.Parser;
40  import org.apache.tika.sax.XHTMLContentHandler;
41  import org.xml.sax.ContentHandler;
42  import org.xml.sax.SAXException;
43  
44  public class AudioParser implements Parser {
45  
46      private static final Set<MediaType> SUPPORTED_TYPES =
47          Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
48                  MediaType.audio("basic"),
49                  MediaType.audio("x-wav"),
50                  MediaType.audio("x-aiff"))));
51  
52      public Set<MediaType> getSupportedTypes(ParseContext context) {
53          return SUPPORTED_TYPES;
54      }
55  
56      public void parse(
57              InputStream stream, ContentHandler handler,
58              Metadata metadata, ParseContext context)
59              throws IOException, SAXException, TikaException {
60          // AudioSystem expects the stream to support the mark feature
61          if (!stream.markSupported()) {
62              stream = new BufferedInputStream(stream);
63          }
64          try {
65              AudioFileFormat fileFormat = AudioSystem.getAudioFileFormat(stream);
66              Type type = fileFormat.getType();
67              if (type == Type.AIFC || type == Type.AIFF) {
68                  metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
69              } else if (type == Type.AU || type == Type.SND) {
70                  metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
71              } else if (type == Type.WAVE) {
72                  metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
73              }
74  
75              AudioFormat audioFormat = fileFormat.getFormat();
76              int channels = audioFormat.getChannels();
77              if (channels != AudioSystem.NOT_SPECIFIED) {
78                  metadata.set("channels", String.valueOf(channels));
79                  // TODO: Use XMPDM.TRACKS? (see also frame rate in AudioFormat)
80              }
81              float rate = audioFormat.getSampleRate();
82              if (rate != AudioSystem.NOT_SPECIFIED) {
83                  metadata.set("samplerate", String.valueOf(rate));
84                  metadata.set(
85                          XMPDM.AUDIO_SAMPLE_RATE,
86                          Integer.toString((int) rate));
87              }
88              int bits = audioFormat.getSampleSizeInBits();
89              if (bits != AudioSystem.NOT_SPECIFIED) {
90                  metadata.set("bits", String.valueOf(bits));
91                  if (bits == 8) {
92                      metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "8Int");
93                  } else if (bits == 16) {
94                      metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "16Int");
95                  } else if (bits == 32) {
96                      metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "32Int");
97                  }
98              }
99              metadata.set("encoding", audioFormat.getEncoding().toString());
100 
101             // Javadoc suggests that some of the following properties might
102             // be available, but I had no success in finding any:
103 
104             // "duration" Long playback duration of the file in microseconds
105             // "author" String name of the author of this file
106             // "title" String title of this file
107             // "copyright" String copyright message
108             // "date" Date date of the recording or release
109             // "comment" String an arbitrary text
110 
111             for (Entry<String, Object> entry : fileFormat.properties().entrySet()) {
112                 metadata.set(entry.getKey(), entry.getValue().toString());
113             }
114             for (Entry<String, Object> entry : audioFormat.properties().entrySet()) {
115                 metadata.set(entry.getKey(), entry.getValue().toString());
116             }
117         } catch (UnsupportedAudioFileException e) {
118             // There is no way to know whether this exception was
119             // caused by the document being corrupted or by the format
120             // just being unsupported. So we do nothing.
121         }
122 
123         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
124         xhtml.startDocument();
125         xhtml.endDocument();
126     }
127 
128     /**
129      * @deprecated This method will be removed in Apache Tika 1.0.
130      */
131     public void parse(
132             InputStream stream, ContentHandler handler, Metadata metadata)
133             throws IOException, SAXException, TikaException {
134         parse(stream, handler, metadata, new ParseContext());
135     }
136 
137 }