1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.odf;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.HashSet;
24 import java.util.Set;
25 import java.util.zip.ZipEntry;
26 import java.util.zip.ZipInputStream;
27
28 import org.apache.tika.exception.TikaException;
29 import org.apache.tika.io.IOUtils;
30 import org.apache.tika.metadata.Metadata;
31 import org.apache.tika.mime.MediaType;
32 import org.apache.tika.parser.ParseContext;
33 import org.apache.tika.parser.Parser;
34 import org.xml.sax.ContentHandler;
35 import org.xml.sax.SAXException;
36 import org.xml.sax.helpers.DefaultHandler;
37
38
39
40
41 public class OpenDocumentParser implements Parser {
42
43 private static final Set<MediaType> SUPPORTED_TYPES =
44 Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
45 MediaType.application("vnd.sun.xml.writer"),
46 MediaType.application("vnd.oasis.opendocument.text"),
47 MediaType.application("vnd.oasis.opendocument.graphics"),
48 MediaType.application("vnd.oasis.opendocument.presentation"),
49 MediaType.application("vnd.oasis.opendocument.spreadsheet"),
50 MediaType.application("vnd.oasis.opendocument.chart"),
51 MediaType.application("vnd.oasis.opendocument.image"),
52 MediaType.application("vnd.oasis.opendocument.formula"),
53 MediaType.application("vnd.oasis.opendocument.text-master"),
54 MediaType.application("vnd.oasis.opendocument.text-web"),
55 MediaType.application("vnd.oasis.opendocument.text-template"),
56 MediaType.application("vnd.oasis.opendocument.graphics-template"),
57 MediaType.application("vnd.oasis.opendocument.presentation-template"),
58 MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
59 MediaType.application("vnd.oasis.opendocument.chart-template"),
60 MediaType.application("vnd.oasis.opendocument.image-template"),
61 MediaType.application("vnd.oasis.opendocument.formula-template"),
62 MediaType.application("x-vnd.oasis.opendocument.text"),
63 MediaType.application("x-vnd.oasis.opendocument.graphics"),
64 MediaType.application("x-vnd.oasis.opendocument.presentation"),
65 MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
66 MediaType.application("x-vnd.oasis.opendocument.chart"),
67 MediaType.application("x-vnd.oasis.opendocument.image"),
68 MediaType.application("x-vnd.oasis.opendocument.formula"),
69 MediaType.application("x-vnd.oasis.opendocument.text-master"),
70 MediaType.application("x-vnd.oasis.opendocument.text-web"),
71 MediaType.application("x-vnd.oasis.opendocument.text-template"),
72 MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
73 MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
74 MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
75 MediaType.application("x-vnd.oasis.opendocument.chart-template"),
76 MediaType.application("x-vnd.oasis.opendocument.image-template"),
77 MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
78
79 private Parser meta = new OpenDocumentMetaParser();
80
81 private Parser content = new OpenDocumentContentParser();
82
83 public Parser getMetaParser() {
84 return meta;
85 }
86
87 public void setMetaParser(Parser meta) {
88 this.meta = meta;
89 }
90
91 public Parser getContentParser() {
92 return content;
93 }
94
95 public void setContentParser(Parser content) {
96 this.content = content;
97 }
98
99 public Set<MediaType> getSupportedTypes(ParseContext context) {
100 return SUPPORTED_TYPES;
101 }
102
103 public void parse(
104 InputStream stream, ContentHandler handler,
105 Metadata metadata, ParseContext context)
106 throws IOException, SAXException, TikaException {
107 ZipInputStream zip = new ZipInputStream(stream);
108 ZipEntry entry = zip.getNextEntry();
109 while (entry != null) {
110 if (entry.getName().equals("mimetype")) {
111 String type = IOUtils.toString(zip, "UTF-8");
112 metadata.set(Metadata.CONTENT_TYPE, type);
113 } else if (entry.getName().equals("meta.xml")) {
114 meta.parse(zip, new DefaultHandler(), metadata, context);
115 } else if (entry.getName().endsWith("content.xml")) {
116 content.parse(zip, handler, metadata, context);
117 }
118 entry = zip.getNextEntry();
119 }
120 }
121
122
123
124
125 public void parse(
126 InputStream stream, ContentHandler handler, Metadata metadata)
127 throws IOException, SAXException, TikaException {
128 parse(stream, handler, metadata, new ParseContext());
129 }
130
131 }