1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.microsoft;
18  
19  import java.io.InputStream;
20  import java.util.Locale;
21  
22  import junit.framework.TestCase;
23  
24  import org.apache.tika.metadata.Metadata;
25  import org.apache.tika.sax.BodyContentHandler;
26  import org.xml.sax.ContentHandler;
27  
28  public class ExcelParserTest extends TestCase {
29  
30      // TODO: This is a workaround until TIKA-371 is fixed
31      private Locale defaultLocale;
32  
33      protected void setUp() {
34          defaultLocale = Locale.getDefault();
35          Locale.setDefault(Locale.US);
36      }
37  
38      protected void tearDown() {
39          Locale.setDefault(defaultLocale);
40      }
41  
42      public void testExcelParser() throws Exception {
43          InputStream input = ExcelParserTest.class.getResourceAsStream(
44                  "/test-documents/testEXCEL.xls");
45          try {
46              Metadata metadata = new Metadata();
47              ContentHandler handler = new BodyContentHandler();
48              new OfficeParser().parse(input, handler, metadata);
49  
50              assertEquals(
51                      "application/vnd.ms-excel",
52                      metadata.get(Metadata.CONTENT_TYPE));
53              assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
54              assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
55              String content = handler.toString();
56              assertTrue(content.contains("Sample Excel Worksheet"));
57              assertTrue(content.contains("Numbers and their Squares"));
58              assertTrue(content.contains("\t\tNumber\tSquare"));
59              assertTrue(content.contains("9"));
60              assertFalse(content.contains("9.0"));
61              assertTrue(content.contains("196"));
62              assertFalse(content.contains("196.0"));
63          } finally {
64              input.close();
65          }
66      }
67  
68      public void testExcelParserFormatting() throws Exception {
69          InputStream input = ExcelParserTest.class.getResourceAsStream(
70                  "/test-documents/testEXCEL-formats.xls");
71          try {
72              Metadata metadata = new Metadata();
73              ContentHandler handler = new BodyContentHandler();
74              new OfficeParser().parse(input, handler, metadata);
75  
76              assertEquals(
77                      "application/vnd.ms-excel",
78                      metadata.get(Metadata.CONTENT_TYPE));
79  
80              String content = handler.toString();
81  
82              // Number #,##0.00
83              assertTrue(content.contains("1,599.99"));
84              assertTrue(content.contains("-1,599.99"));
85  
86              // Currency $#,##0.00;[Red]($#,##0.00)
87              assertTrue(content.contains("$1,599.99"));
88              assertTrue(content.contains("($1,599.99)"));
89  
90              // Scientific 0.00E+00
91              assertTrue(content.contains("1.98E08"));
92              assertTrue(content.contains("-1.98E08"));
93  
94              // Percentage
95              assertTrue(content.contains("2%"));
96              assertTrue(content.contains("2.50%"));
97  
98              // Time Format: h:mm
99              assertTrue(content.contains("6:15"));
100             assertTrue(content.contains("18:15"));
101 
102             // Date Format: d-mmm-yy
103             assertTrue(content.contains("17-May-07"));
104 
105             // Below assertions represent outstanding formatting issues to be addressed
106             // they are included to allow the issues to be progressed with the Apache POI
107             // team - See TIKA-103.
108 
109             /*************************************************************************
110             // Date Format: m/d/yy
111             assertTrue(content.contains("03/10/2009"));
112 
113             // Date/Time Format
114             assertTrue(content.contains("19/01/2008 04:35"));
115 
116             // Custom Number (0 "dollars and" .00 "cents")
117             assertTrue(content.contains("19 dollars and .99 cents"));
118 
119             // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
120             assertTrue(content.contains("At 4:20 AM on Thursday May 17, 2007"));
121 
122             // Fraction (2.5): # ?/?
123             assertTrue(content.contains("2 1 / 2"));
124             **************************************************************************/
125 
126         } finally {
127             input.close();
128         }
129     }
130 
131 }