1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.audio;
18
19 import java.io.BufferedInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.util.Arrays;
23 import java.util.Collections;
24 import java.util.HashSet;
25 import java.util.Set;
26 import java.util.Map.Entry;
27
28 import javax.sound.sampled.AudioFileFormat;
29 import javax.sound.sampled.AudioFormat;
30 import javax.sound.sampled.AudioSystem;
31 import javax.sound.sampled.UnsupportedAudioFileException;
32 import javax.sound.sampled.AudioFileFormat.Type;
33
34 import org.apache.tika.exception.TikaException;
35 import org.apache.tika.metadata.Metadata;
36 import org.apache.tika.metadata.XMPDM;
37 import org.apache.tika.mime.MediaType;
38 import org.apache.tika.parser.ParseContext;
39 import org.apache.tika.parser.Parser;
40 import org.apache.tika.sax.XHTMLContentHandler;
41 import org.xml.sax.ContentHandler;
42 import org.xml.sax.SAXException;
43
44 public class AudioParser implements Parser {
45
46 private static final Set<MediaType> SUPPORTED_TYPES =
47 Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
48 MediaType.audio("basic"),
49 MediaType.audio("x-wav"),
50 MediaType.audio("x-aiff"))));
51
52 public Set<MediaType> getSupportedTypes(ParseContext context) {
53 return SUPPORTED_TYPES;
54 }
55
56 public void parse(
57 InputStream stream, ContentHandler handler,
58 Metadata metadata, ParseContext context)
59 throws IOException, SAXException, TikaException {
60
61 if (!stream.markSupported()) {
62 stream = new BufferedInputStream(stream);
63 }
64 try {
65 AudioFileFormat fileFormat = AudioSystem.getAudioFileFormat(stream);
66 Type type = fileFormat.getType();
67 if (type == Type.AIFC || type == Type.AIFF) {
68 metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
69 } else if (type == Type.AU || type == Type.SND) {
70 metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
71 } else if (type == Type.WAVE) {
72 metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
73 }
74
75 AudioFormat audioFormat = fileFormat.getFormat();
76 int channels = audioFormat.getChannels();
77 if (channels != AudioSystem.NOT_SPECIFIED) {
78 metadata.set("channels", String.valueOf(channels));
79
80 }
81 float rate = audioFormat.getSampleRate();
82 if (rate != AudioSystem.NOT_SPECIFIED) {
83 metadata.set("samplerate", String.valueOf(rate));
84 metadata.set(
85 XMPDM.AUDIO_SAMPLE_RATE,
86 Integer.toString((int) rate));
87 }
88 int bits = audioFormat.getSampleSizeInBits();
89 if (bits != AudioSystem.NOT_SPECIFIED) {
90 metadata.set("bits", String.valueOf(bits));
91 if (bits == 8) {
92 metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "8Int");
93 } else if (bits == 16) {
94 metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "16Int");
95 } else if (bits == 32) {
96 metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "32Int");
97 }
98 }
99 metadata.set("encoding", audioFormat.getEncoding().toString());
100
101
102
103
104
105
106
107
108
109
110
111 for (Entry<String, Object> entry : fileFormat.properties().entrySet()) {
112 metadata.set(entry.getKey(), entry.getValue().toString());
113 }
114 for (Entry<String, Object> entry : audioFormat.properties().entrySet()) {
115 metadata.set(entry.getKey(), entry.getValue().toString());
116 }
117 } catch (UnsupportedAudioFileException e) {
118
119
120
121 }
122
123 XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
124 xhtml.startDocument();
125 xhtml.endDocument();
126 }
127
128
129
130
131 public void parse(
132 InputStream stream, ContentHandler handler, Metadata metadata)
133 throws IOException, SAXException, TikaException {
134 parse(stream, handler, metadata, new ParseContext());
135 }
136
137 }