1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.gui;
18
19 import java.awt.Dimension;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.PrintWriter;
23 import java.io.StringWriter;
24 import java.io.Writer;
25 import java.util.Arrays;
26
27 import javax.swing.JEditorPane;
28 import javax.swing.JFrame;
29 import javax.swing.JOptionPane;
30 import javax.swing.JScrollPane;
31 import javax.swing.JTabbedPane;
32 import javax.swing.ProgressMonitorInputStream;
33 import javax.swing.SwingUtilities;
34 import javax.swing.UIManager;
35 import javax.xml.transform.OutputKeys;
36 import javax.xml.transform.TransformerConfigurationException;
37 import javax.xml.transform.sax.SAXTransformerFactory;
38 import javax.xml.transform.sax.TransformerHandler;
39 import javax.xml.transform.stream.StreamResult;
40
41 import org.apache.tika.metadata.Metadata;
42 import org.apache.tika.parser.AutoDetectParser;
43 import org.apache.tika.parser.ParseContext;
44 import org.apache.tika.parser.Parser;
45 import org.apache.tika.sax.BodyContentHandler;
46 import org.apache.tika.sax.ContentHandlerDecorator;
47 import org.apache.tika.sax.TeeContentHandler;
48 import org.apache.tika.sax.XHTMLContentHandler;
49 import org.xml.sax.Attributes;
50 import org.xml.sax.ContentHandler;
51 import org.xml.sax.SAXException;
52
53
54
55
56
57 public class TikaGUI extends JFrame {
58
59
60
61
62 private static final long serialVersionUID = 5883906936187059495L;
63
64
65
66
67
68
69
70
71
72 public static void main(String[] args) throws Exception {
73 UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
74 SwingUtilities.invokeLater(new Runnable() {
75 public void run() {
76 new TikaGUI(new AutoDetectParser()).setVisible(true);
77 }
78 });
79 }
80
81
82
83
84 private final ParseContext context;
85
86
87
88
89 private final Parser parser;
90
91
92
93
94 private final JTabbedPane tabs;
95
96
97
98
99 private final JEditorPane html;
100
101
102
103
104 private final JEditorPane text;
105
106
107
108
109 private final JEditorPane xml;
110
111
112
113
114 private final JEditorPane metadata;
115
116
117
118
119 private final JEditorPane errors;
120
121 public TikaGUI(Parser parser) {
122 super("Apache Tika");
123 setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
124
125 tabs = new JTabbedPane();
126 add(tabs);
127
128 html = createEditor("Formatted text", "text/html");
129 text = createEditor("Plain text", "text/plain");
130 xml = createEditor("Structured text", "text/plain");
131 metadata = createEditor("Metadata", "text/plain");
132 errors = createEditor("Errors", "text/plain");
133
134 setPreferredSize(new Dimension(500, 400));
135 pack();
136
137 this.context = new ParseContext();
138 this.parser = parser;
139 this.context.set(Parser.class, parser);
140 }
141
142 public void importStream(InputStream input, Metadata md)
143 throws IOException {
144 try {
145 StringWriter htmlBuffer = new StringWriter();
146 StringWriter textBuffer = new StringWriter();
147 StringWriter xmlBuffer = new StringWriter();
148 StringBuilder metadataBuffer = new StringBuilder();
149
150 ContentHandler handler = new TeeContentHandler(
151 getHtmlHandler(htmlBuffer),
152 getTextContentHandler(textBuffer),
153 getXmlContentHandler(xmlBuffer));
154
155 input = new ProgressMonitorInputStream(
156 this, "Parsing stream", input);
157 parser.parse(input, handler, md, context);
158
159 String[] names = md.names();
160 Arrays.sort(names);
161 for (String name : names) {
162 metadataBuffer.append(name);
163 metadataBuffer.append(": ");
164 metadataBuffer.append(md.get(name));
165 metadataBuffer.append("\n");
166 }
167
168 setText(errors, "");
169 setText(metadata, metadataBuffer.toString());
170 setText(xml, xmlBuffer.toString());
171 setText(text, textBuffer.toString());
172 setText(html, htmlBuffer.toString());
173 tabs.setSelectedIndex(0);
174 } catch (Exception e) {
175 StringWriter writer = new StringWriter();
176 e.printStackTrace(new PrintWriter(writer));
177 setText(errors, writer.toString());
178 setText(metadata, "");
179 setText(xml, "");
180 setText(text, "");
181 setText(html, "");
182 tabs.setSelectedIndex(tabs.getTabCount() - 1);
183 JOptionPane.showMessageDialog(
184 this,
185 "Apache Tika was unable to parse the file or url.\n "
186 + " See the errors tab for"
187 + " the detailed stack trace of this error.",
188 "Parse error",
189 JOptionPane.ERROR_MESSAGE);
190 } finally {
191 input.close();
192 }
193 }
194
195 private JEditorPane createEditor(String title, String type) {
196 JEditorPane editor = new JEditorPane();
197 editor.setContentType(type);
198 editor.setTransferHandler(new ParsingTransferHandler(
199 editor.getTransferHandler(), this));
200 tabs.add(title, new JScrollPane(editor));
201 return editor;
202 }
203
204 private void setText(JEditorPane editor, String text) {
205 editor.setText(text);
206 editor.setCaretPosition(0);
207 }
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227 private ContentHandler getHtmlHandler(Writer writer)
228 throws TransformerConfigurationException {
229 SAXTransformerFactory factory = (SAXTransformerFactory)
230 SAXTransformerFactory.newInstance();
231 TransformerHandler handler = factory.newTransformerHandler();
232 handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
233 handler.setResult(new StreamResult(writer));
234 return new ContentHandlerDecorator(handler) {
235 @Override
236 public void startElement(
237 String uri, String localName, String name, Attributes atts)
238 throws SAXException {
239 if (XHTMLContentHandler.XHTML.equals(uri)) {
240 uri = null;
241 }
242 if (!"head".equals(localName)) {
243 super.startElement(uri, localName, name, atts);
244 }
245 }
246 @Override
247 public void endElement(String uri, String localName, String name)
248 throws SAXException {
249 if (XHTMLContentHandler.XHTML.equals(uri)) {
250 uri = null;
251 }
252 if (!"head".equals(localName)) {
253 super.endElement(uri, localName, name);
254 }
255 }
256 @Override
257 public void startPrefixMapping(String prefix, String uri) {
258 }
259 @Override
260 public void endPrefixMapping(String prefix) {
261 }
262 };
263 }
264
265 private ContentHandler getTextContentHandler(Writer writer) {
266 return new BodyContentHandler(writer);
267 }
268
269 private ContentHandler getXmlContentHandler(Writer writer)
270 throws TransformerConfigurationException {
271 SAXTransformerFactory factory = (SAXTransformerFactory)
272 SAXTransformerFactory.newInstance();
273 TransformerHandler handler = factory.newTransformerHandler();
274 handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
275 handler.setResult(new StreamResult(writer));
276 return handler;
277 }
278
279 }