1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.microsoft;
18
19 import java.io.InputStream;
20 import java.util.Locale;
21
22 import junit.framework.TestCase;
23
24 import org.apache.tika.metadata.Metadata;
25 import org.apache.tika.sax.BodyContentHandler;
26 import org.xml.sax.ContentHandler;
27
28 public class ExcelParserTest extends TestCase {
29
30
31 private Locale defaultLocale;
32
33 protected void setUp() {
34 defaultLocale = Locale.getDefault();
35 Locale.setDefault(Locale.US);
36 }
37
38 protected void tearDown() {
39 Locale.setDefault(defaultLocale);
40 }
41
42 public void testExcelParser() throws Exception {
43 InputStream input = ExcelParserTest.class.getResourceAsStream(
44 "/test-documents/testEXCEL.xls");
45 try {
46 Metadata metadata = new Metadata();
47 ContentHandler handler = new BodyContentHandler();
48 new OfficeParser().parse(input, handler, metadata);
49
50 assertEquals(
51 "application/vnd.ms-excel",
52 metadata.get(Metadata.CONTENT_TYPE));
53 assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
54 assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
55 String content = handler.toString();
56 assertTrue(content.contains("Sample Excel Worksheet"));
57 assertTrue(content.contains("Numbers and their Squares"));
58 assertTrue(content.contains("\t\tNumber\tSquare"));
59 assertTrue(content.contains("9"));
60 assertFalse(content.contains("9.0"));
61 assertTrue(content.contains("196"));
62 assertFalse(content.contains("196.0"));
63 } finally {
64 input.close();
65 }
66 }
67
68 public void testExcelParserFormatting() throws Exception {
69 InputStream input = ExcelParserTest.class.getResourceAsStream(
70 "/test-documents/testEXCEL-formats.xls");
71 try {
72 Metadata metadata = new Metadata();
73 ContentHandler handler = new BodyContentHandler();
74 new OfficeParser().parse(input, handler, metadata);
75
76 assertEquals(
77 "application/vnd.ms-excel",
78 metadata.get(Metadata.CONTENT_TYPE));
79
80 String content = handler.toString();
81
82
83 assertTrue(content.contains("1,599.99"));
84 assertTrue(content.contains("-1,599.99"));
85
86
87 assertTrue(content.contains("$1,599.99"));
88 assertTrue(content.contains("($1,599.99)"));
89
90
91 assertTrue(content.contains("1.98E08"));
92 assertTrue(content.contains("-1.98E08"));
93
94
95 assertTrue(content.contains("2%"));
96 assertTrue(content.contains("2.50%"));
97
98
99 assertTrue(content.contains("6:15"));
100 assertTrue(content.contains("18:15"));
101
102
103 assertTrue(content.contains("17-May-07"));
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126 } finally {
127 input.close();
128 }
129 }
130
131 }