1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.util.Collections;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.Map;
25 import java.util.Set;
26
27 import org.apache.tika.exception.TikaException;
28 import org.apache.tika.io.TaggedInputStream;
29 import org.apache.tika.metadata.Metadata;
30 import org.apache.tika.mime.MediaType;
31 import org.apache.tika.sax.TaggedContentHandler;
32 import org.xml.sax.ContentHandler;
33 import org.xml.sax.SAXException;
34
35
36
37
38
39
40
41 public class CompositeParser implements Parser {
42
43
44
45
46 private Map<String, Parser> parsers = new HashMap<String, Parser>();
47
48
49
50
51 private Parser fallback = new EmptyParser();
52
53
54
55
56
57
58 public Map<String, Parser> getParsers() {
59 return parsers;
60 }
61
62
63
64
65
66
67 public void setParsers(Map<String, Parser> parsers) {
68 this.parsers = parsers;
69 }
70
71
72
73
74
75
76 public Parser getFallback() {
77 return fallback;
78 }
79
80
81
82
83
84
85 public void setFallback(Parser fallback) {
86 this.fallback = fallback;
87 }
88
89
90
91
92
93
94
95
96
97
98
99
100 protected Parser getParser(Metadata metadata) {
101 Parser parser = parsers.get(metadata.get(Metadata.CONTENT_TYPE));
102 if (parser == null) {
103 parser = fallback;
104 }
105 return parser;
106 }
107
108 public Set<MediaType> getSupportedTypes(ParseContext context) {
109 Set<MediaType> supportedTypes = new HashSet<MediaType>();
110 for (String type : parsers.keySet()) {
111 supportedTypes.add(MediaType.parse(type));
112 }
113 return Collections.unmodifiableSet(supportedTypes);
114 }
115
116
117
118
119
120
121
122
123
124 public void parse(
125 InputStream stream, ContentHandler handler,
126 Metadata metadata, ParseContext context)
127 throws IOException, SAXException, TikaException {
128 Parser parser = getParser(metadata);
129 TaggedInputStream taggedStream = new TaggedInputStream(stream);
130 TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
131 try {
132 parser.parse(taggedStream, taggedHandler, metadata, context);
133 } catch (RuntimeException e) {
134 throw new TikaException(
135 "Unexpected RuntimeException from " + parser, e);
136 } catch (IOException e) {
137 taggedStream.throwIfCauseOf(e);
138 throw new TikaException(
139 "TIKA-198: Illegal IOException from " + parser, e);
140 } catch (SAXException e) {
141 taggedHandler.throwIfCauseOf(e);
142 throw new TikaException(
143 "TIKA-237: Illegal SAXException from " + parser, e);
144 }
145 }
146
147
148
149
150 public void parse(
151 InputStream stream, ContentHandler handler, Metadata metadata)
152 throws IOException, SAXException, TikaException {
153 parse(stream, handler, metadata, new ParseContext());
154 }
155
156 }