1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.pkg;
18
19 import java.io.InputStream;
20
21 import junit.framework.TestCase;
22
23 import org.apache.tika.metadata.Metadata;
24 import org.apache.tika.parser.AutoDetectParser;
25 import org.apache.tika.parser.Parser;
26 import org.apache.tika.sax.BodyContentHandler;
27 import org.xml.sax.ContentHandler;
28
29
30
31
32 public class ZipParserTest extends TestCase {
33
34 public void testZipParsing() throws Exception {
35 Parser parser = new AutoDetectParser();
36 ContentHandler handler = new BodyContentHandler();
37 Metadata metadata = new Metadata();
38
39 InputStream stream = ZipParserTest.class.getResourceAsStream(
40 "/test-documents/test-documents.zip");
41 try {
42 parser.parse(stream, handler, metadata);
43 } finally {
44 stream.close();
45 }
46
47 assertEquals("application/zip", metadata.get(Metadata.CONTENT_TYPE));
48 String content = handler.toString();
49 assertTrue(content.contains("testEXCEL.xls"));
50 assertTrue(content.contains("Sample Excel Worksheet"));
51 assertTrue(content.contains("testHTML.html"));
52 assertTrue(content.contains("Test Indexation Html"));
53 assertTrue(content.contains("testOpenOffice2.odt"));
54 assertTrue(content.contains("This is a sample Open Office document"));
55 assertTrue(content.contains("testPDF.pdf"));
56 assertTrue(content.contains("Apache Tika"));
57 assertTrue(content.contains("testPPT.ppt"));
58 assertTrue(content.contains("Sample Powerpoint Slide"));
59 assertTrue(content.contains("testRTF.rtf"));
60 assertTrue(content.contains("indexation Word"));
61 assertTrue(content.contains("testTXT.txt"));
62 assertTrue(content.contains("Test d'indexation de Txt"));
63 assertTrue(content.contains("testWORD.doc"));
64 assertTrue(content.contains("This is a sample Microsoft Word Document"));
65 assertTrue(content.contains("testXML.xml"));
66 assertTrue(content.contains("Rida Benjelloun"));
67 }
68
69 }