1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.microsoft.ooxml;
18
19 import org.apache.poi.POIXMLTextExtractor;
20 import org.apache.poi.POIXMLProperties.CoreProperties;
21 import org.apache.poi.POIXMLProperties.ExtendedProperties;
22 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
23 import org.apache.poi.openxml4j.util.Nullable;
24 import org.apache.tika.exception.TikaException;
25 import org.apache.tika.metadata.Metadata;
26 import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
27
28
29
30
31
32
33
34
35 public class MetadataExtractor {
36
37 private final POIXMLTextExtractor extractor;
38
39 private final String type;
40
41 public MetadataExtractor(POIXMLTextExtractor extractor, String type) {
42 this.extractor = extractor;
43 this.type = type;
44 }
45
46 public void extract(Metadata metadata) throws TikaException {
47 addProperty(metadata, Metadata.CONTENT_TYPE, type);
48 extractMetadata(extractor.getCoreProperties(), metadata);
49 extractMetadata(extractor.getExtendedProperties(), metadata);
50 }
51
52 private void extractMetadata(CoreProperties properties, Metadata metadata) {
53 PackagePropertiesPart propsHolder = properties
54 .getUnderlyingProperties();
55
56 addProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
57 addProperty(metadata, Metadata.CONTENT_STATUS, propsHolder
58 .getContentStatusProperty());
59 addProperty(metadata, Metadata.DATE, propsHolder
60 .getCreatedPropertyString());
61 addProperty(metadata, Metadata.CREATOR, propsHolder
62 .getCreatorProperty());
63 addProperty(metadata, Metadata.AUTHOR, propsHolder
64 .getCreatorProperty());
65 addProperty(metadata, Metadata.DESCRIPTION, propsHolder
66 .getDescriptionProperty());
67 addProperty(metadata, Metadata.IDENTIFIER, propsHolder
68 .getIdentifierProperty());
69 addProperty(metadata, Metadata.KEYWORDS, propsHolder
70 .getKeywordsProperty());
71 addProperty(metadata, Metadata.LANGUAGE, propsHolder
72 .getLanguageProperty());
73 addProperty(metadata, Metadata.LAST_AUTHOR, propsHolder
74 .getLastModifiedByProperty());
75 addProperty(metadata, Metadata.LAST_PRINTED, propsHolder
76 .getLastPrintedPropertyString());
77 addProperty(metadata, Metadata.LAST_MODIFIED, propsHolder
78 .getModifiedPropertyString());
79 addProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
80 .getRevisionProperty());
81 addProperty(metadata, Metadata.SUBJECT, propsHolder
82 .getSubjectProperty());
83 addProperty(metadata, Metadata.TITLE, propsHolder.getTitleProperty());
84 addProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
85 }
86
87 private void extractMetadata(ExtendedProperties properties,
88 Metadata metadata) {
89 CTProperties propsHolder = properties.getUnderlyingProperties();
90
91 addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder
92 .getApplication());
93 addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder
94 .getAppVersion());
95 addProperty(metadata, Metadata.CHARACTER_COUNT, propsHolder
96 .getCharacters());
97 addProperty(metadata, Metadata.CHARACTER_COUNT_WITH_SPACES, propsHolder
98 .getCharactersWithSpaces());
99 addProperty(metadata, Metadata.PUBLISHER, propsHolder.getCompany());
100 addProperty(metadata, Metadata.LINE_COUNT, propsHolder.getLines());
101 addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
102 addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
103 addProperty(metadata, Metadata.PAGE_COUNT, propsHolder.getPages());
104 addProperty(metadata, Metadata.PARAGRAPH_COUNT, propsHolder.getParagraphs());
105 addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder
106 .getPresentationFormat());
107 addProperty(metadata, Metadata.SLIDE_COUNT, propsHolder.getSlides());
108 addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
109 addProperty(metadata, Metadata.TOTAL_TIME, propsHolder.getTotalTime());
110 addProperty(metadata, Metadata.WORD_COUNT, propsHolder.getWords());
111 }
112
113 private void addProperty(Metadata metadata, String name, Nullable<?> value) {
114 if (value.getValue() != null) {
115 addProperty(metadata, name, value.getValue().toString());
116 }
117 }
118
119 private void addProperty(Metadata metadata, String name, String value) {
120 if (value != null) {
121 metadata.set(name, value);
122 }
123 }
124
125 private void addProperty(Metadata metadata, String name, long value) {
126 if (value > 0) {
127 metadata.set(name, Long.toString(value));
128 }
129 }
130 }