1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.microsoft.ooxml;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.HashSet;
24 import java.util.Locale;
25 import java.util.Set;
26
27 import org.apache.tika.exception.TikaException;
28 import org.apache.tika.metadata.Metadata;
29 import org.apache.tika.mime.MediaType;
30 import org.apache.tika.parser.ParseContext;
31 import org.apache.tika.parser.Parser;
32 import org.xml.sax.ContentHandler;
33 import org.xml.sax.SAXException;
34
35
36
37
38
39 public class OOXMLParser implements Parser {
40
41 private static final Set<MediaType> SUPPORTED_TYPES =
42 Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
43 MediaType.application("x-tika-ooxml"),
44 MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation"),
45 MediaType.application("vnd.ms-powerpoint.presentation.macroenabled.12"),
46 MediaType.application("vnd.openxmlformats-officedocument.presentationml.template"),
47 MediaType.application("vnd.openxmlformats-officedocument.presentationml.slideshow"),
48 MediaType.application("vnd.ms-powerpoint.slideshow.macroenabled.12"),
49 MediaType.application("vnd.ms-powerpoint.addin.macroenabled.12"),
50 MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
51 MediaType.application("vnd.ms-excel.sheet.macroenabled.12"),
52 MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.template"),
53 MediaType.application("vnd.ms-excel.template.macroenabled.12"),
54 MediaType.application("vnd.ms-excel.addin.macroenabled.12"),
55 MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document"),
56 MediaType.application("vnd.ms-word.document.macroenabled.12"),
57 MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.template"),
58 MediaType.application("vnd.ms-word.template.macroenabled.12"))));
59
60 public Set<MediaType> getSupportedTypes(ParseContext context) {
61 return SUPPORTED_TYPES;
62 }
63
64 public void parse(
65 InputStream stream, ContentHandler handler,
66 Metadata metadata, ParseContext context)
67 throws IOException, SAXException, TikaException {
68 Locale locale = context.get(Locale.class, Locale.getDefault());
69 OOXMLExtractorFactory.parse(stream, handler, metadata, locale);
70 }
71
72
73
74
75 public void parse(
76 InputStream stream, ContentHandler handler, Metadata metadata)
77 throws IOException, SAXException, TikaException {
78 parse(stream, handler, metadata, new ParseContext());
79 }
80
81 }