1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser;
18
19 import java.io.BufferedInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22
23 import org.apache.tika.config.TikaConfig;
24 import org.apache.tika.detect.Detector;
25 import org.apache.tika.exception.TikaException;
26 import org.apache.tika.io.CountingInputStream;
27 import org.apache.tika.metadata.Metadata;
28 import org.apache.tika.mime.MediaType;
29 import org.apache.tika.sax.SecureContentHandler;
30 import org.xml.sax.ContentHandler;
31 import org.xml.sax.SAXException;
32
33 public class AutoDetectParser extends CompositeParser {
34
35
36
37
38
39 private Detector detector;
40
41
42
43
44
45 public AutoDetectParser() {
46 this(TikaConfig.getDefaultConfig());
47 }
48
49 public AutoDetectParser(TikaConfig config) {
50 setConfig(config);
51 }
52
53 public void setConfig(TikaConfig config) {
54 setParsers(config.getParsers());
55 setDetector(config.getMimeRepository());
56 }
57
58
59
60
61
62
63
64
65 public Detector getDetector() {
66 return detector;
67 }
68
69
70
71
72
73
74
75
76
77
78 public void setDetector(Detector detector) {
79 this.detector = detector;
80 }
81
82 public void parse(
83 InputStream stream, ContentHandler handler,
84 Metadata metadata, ParseContext context)
85 throws IOException, SAXException, TikaException {
86
87 stream = new BufferedInputStream(stream);
88
89
90 MediaType type = detector.detect(stream, metadata);
91 metadata.set(Metadata.CONTENT_TYPE, type.toString());
92
93
94 CountingInputStream count = new CountingInputStream(stream);
95 SecureContentHandler secure = new SecureContentHandler(handler, count);
96
97
98 try {
99 super.parse(count, secure, metadata, context);
100 } catch (SAXException e) {
101
102 secure.throwIfCauseOf(e);
103 throw e;
104 }
105 }
106
107 public void parse(
108 InputStream stream, ContentHandler handler, Metadata metadata)
109 throws IOException, SAXException, TikaException {
110 ParseContext context = new ParseContext();
111 context.set(Parser.class, this);
112 parse(stream, handler, metadata, context);
113 }
114
115 }