1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.microsoft;
18
19 import java.io.FileNotFoundException;
20 import java.io.IOException;
21 import java.util.Date;
22
23 import org.apache.poi.hpsf.CustomProperties;
24 import org.apache.poi.hpsf.DocumentSummaryInformation;
25 import org.apache.poi.hpsf.MarkUnsupportedException;
26 import org.apache.poi.hpsf.NoPropertySetStreamException;
27 import org.apache.poi.hpsf.PropertySet;
28 import org.apache.poi.hpsf.SummaryInformation;
29 import org.apache.poi.hpsf.UnexpectedPropertySetTypeException;
30 import org.apache.poi.poifs.filesystem.DocumentEntry;
31 import org.apache.poi.poifs.filesystem.DocumentInputStream;
32 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
33 import org.apache.tika.exception.TikaException;
34 import org.apache.tika.metadata.Metadata;
35
36
37
38
39 class SummaryExtractor {
40
41 private static final String SUMMARY_INFORMATION =
42 SummaryInformation.DEFAULT_STREAM_NAME;
43
44 private static final String DOCUMENT_SUMMARY_INFORMATION =
45 DocumentSummaryInformation.DEFAULT_STREAM_NAME;
46
47 private final Metadata metadata;
48
49 public SummaryExtractor(Metadata metadata) {
50 this.metadata = metadata;
51 }
52
53 public void parseSummaries(POIFSFileSystem filesystem)
54 throws IOException, TikaException {
55 parseSummaryEntryIfExists(filesystem, SUMMARY_INFORMATION);
56 parseSummaryEntryIfExists(filesystem, DOCUMENT_SUMMARY_INFORMATION);
57 }
58
59 private void parseSummaryEntryIfExists(
60 POIFSFileSystem filesystem, String entryName)
61 throws IOException, TikaException {
62 try {
63 DocumentEntry entry =
64 (DocumentEntry) filesystem.getRoot().getEntry(entryName);
65 PropertySet properties =
66 new PropertySet(new DocumentInputStream(entry));
67 if (properties.isSummaryInformation()) {
68 parse(new SummaryInformation(properties));
69 }
70 if (properties.isDocumentSummaryInformation()) {
71 parse(new DocumentSummaryInformation(properties));
72 }
73 } catch (FileNotFoundException e) {
74
75 } catch (NoPropertySetStreamException e) {
76 throw new TikaException("Not a HPSF document", e);
77 } catch (UnexpectedPropertySetTypeException e) {
78 throw new TikaException("Unexpected HPSF document", e);
79 } catch (MarkUnsupportedException e) {
80 throw new TikaException("Invalid DocumentInputStream", e);
81 }
82 }
83
84 private void parse(SummaryInformation summary) {
85 set(Metadata.TITLE, summary.getTitle());
86 set(Metadata.AUTHOR, summary.getAuthor());
87 set(Metadata.KEYWORDS, summary.getKeywords());
88 set(Metadata.SUBJECT, summary.getSubject());
89 set(Metadata.LAST_AUTHOR, summary.getLastAuthor());
90 set(Metadata.COMMENTS, summary.getComments());
91 set(Metadata.TEMPLATE, summary.getTemplate());
92 set(Metadata.APPLICATION_NAME, summary.getApplicationName());
93 set(Metadata.REVISION_NUMBER, summary.getRevNumber());
94 set(Metadata.CREATION_DATE, summary.getCreateDateTime());
95 set(Metadata.CHARACTER_COUNT, summary.getCharCount());
96 set(Metadata.EDIT_TIME, summary.getEditTime());
97 set(Metadata.LAST_SAVED, summary.getLastSaveDateTime());
98 set(Metadata.PAGE_COUNT, summary.getPageCount());
99 set(Metadata.SECURITY, summary.getSecurity());
100 set(Metadata.WORD_COUNT, summary.getWordCount());
101 set(Metadata.LAST_PRINTED, summary.getLastPrinted());
102 }
103
104 private void parse(DocumentSummaryInformation summary) {
105 set(Metadata.COMPANY, summary.getCompany());
106 set(Metadata.MANAGER, summary.getManager());
107 set(Metadata.LANGUAGE, getLanguage(summary));
108 set(Metadata.CATEGORY, summary.getCategory());
109 }
110
111 private String getLanguage(DocumentSummaryInformation summary) {
112 CustomProperties customProperties = summary.getCustomProperties();
113 if (customProperties != null) {
114 Object value = customProperties.get("Language");
115 if (value instanceof String) {
116 return (String) value;
117 }
118 }
119 return null;
120 }
121
122 private void set(String name, String value) {
123 if (value != null) {
124 metadata.set(name, value);
125 }
126 }
127
128 private void set(String name, Date value) {
129 if (value != null) {
130 metadata.set(name, value.toString());
131 }
132 }
133
134 private void set(String name, long value) {
135 if (value > 0) {
136 metadata.set(name, Long.toString(value));
137 }
138 }
139
140 }