View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.microsoft.ooxml;
18  
19  import org.apache.poi.POIXMLTextExtractor;
20  import org.apache.poi.POIXMLProperties.CoreProperties;
21  import org.apache.poi.POIXMLProperties.ExtendedProperties;
22  import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
23  import org.apache.poi.openxml4j.util.Nullable;
24  import org.apache.tika.exception.TikaException;
25  import org.apache.tika.metadata.Metadata;
26  import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
27  
28  /**
29   * OOXML metadata extractor.
30   * 
31   * Currently POI doesn't support metadata extraction for OOXML.
32   * 
33   * @see OOXMLExtractor#getMetadataExtractor()
34   */
35  public class MetadataExtractor {
36  
37      private final POIXMLTextExtractor extractor;
38  
39      private final String type;
40  
41      public MetadataExtractor(POIXMLTextExtractor extractor, String type) {
42          this.extractor = extractor;
43          this.type = type;
44      }
45  
46      public void extract(Metadata metadata) throws TikaException {
47          addProperty(metadata, Metadata.CONTENT_TYPE, type);
48          extractMetadata(extractor.getCoreProperties(), metadata);
49          extractMetadata(extractor.getExtendedProperties(), metadata);
50      }
51  
52      private void extractMetadata(CoreProperties properties, Metadata metadata) {
53          PackagePropertiesPart propsHolder = properties
54                  .getUnderlyingProperties();
55  
56          addProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
57          addProperty(metadata, Metadata.CONTENT_STATUS, propsHolder
58                  .getContentStatusProperty());
59          addProperty(metadata, Metadata.DATE, propsHolder
60                  .getCreatedPropertyString());
61          addProperty(metadata, Metadata.CREATOR, propsHolder
62                  .getCreatorProperty());
63          addProperty(metadata, Metadata.AUTHOR, propsHolder
64                  .getCreatorProperty());
65          addProperty(metadata, Metadata.DESCRIPTION, propsHolder
66                  .getDescriptionProperty());
67          addProperty(metadata, Metadata.IDENTIFIER, propsHolder
68                  .getIdentifierProperty());
69          addProperty(metadata, Metadata.KEYWORDS, propsHolder
70                  .getKeywordsProperty());
71          addProperty(metadata, Metadata.LANGUAGE, propsHolder
72                  .getLanguageProperty());
73          addProperty(metadata, Metadata.LAST_AUTHOR, propsHolder
74                  .getLastModifiedByProperty());
75          addProperty(metadata, Metadata.LAST_PRINTED, propsHolder
76                  .getLastPrintedPropertyString());
77          addProperty(metadata, Metadata.LAST_MODIFIED, propsHolder
78                  .getModifiedPropertyString());
79          addProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
80                  .getRevisionProperty());
81          addProperty(metadata, Metadata.SUBJECT, propsHolder
82                  .getSubjectProperty());
83          addProperty(metadata, Metadata.TITLE, propsHolder.getTitleProperty());
84          addProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
85      }
86  
87      private void extractMetadata(ExtendedProperties properties,
88              Metadata metadata) {
89          CTProperties propsHolder = properties.getUnderlyingProperties();
90  
91          addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder
92                  .getApplication());
93          addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder
94                  .getAppVersion());
95          addProperty(metadata, Metadata.CHARACTER_COUNT, propsHolder
96                  .getCharacters());
97          addProperty(metadata, Metadata.CHARACTER_COUNT_WITH_SPACES, propsHolder
98                  .getCharactersWithSpaces());
99          addProperty(metadata, Metadata.PUBLISHER, propsHolder.getCompany());
100         addProperty(metadata, Metadata.LINE_COUNT, propsHolder.getLines());
101         addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
102         addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
103         addProperty(metadata, Metadata.PAGE_COUNT, propsHolder.getPages());
104         addProperty(metadata, Metadata.PARAGRAPH_COUNT, propsHolder.getParagraphs());
105         addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder
106                 .getPresentationFormat());
107         addProperty(metadata, Metadata.SLIDE_COUNT, propsHolder.getSlides());
108         addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
109         addProperty(metadata, Metadata.TOTAL_TIME, propsHolder.getTotalTime());
110         addProperty(metadata, Metadata.WORD_COUNT, propsHolder.getWords());
111     }
112 
113     private void addProperty(Metadata metadata, String name, Nullable<?> value) {
114         if (value.getValue() != null) {
115             addProperty(metadata, name, value.getValue().toString());
116         }
117     }
118 
119     private void addProperty(Metadata metadata, String name, String value) {
120         if (value != null) {
121             metadata.set(name, value);
122         }
123     }
124 
125     private void addProperty(Metadata metadata, String name, long value) {
126         if (value > 0) {
127             metadata.set(name, Long.toString(value));
128         }
129     }
130 }