1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.pkg;
18
19 import java.io.InputStream;
20
21 import junit.framework.TestCase;
22
23 import org.apache.tika.metadata.Metadata;
24 import org.apache.tika.parser.AutoDetectParser;
25 import org.apache.tika.parser.Parser;
26 import org.apache.tika.sax.BodyContentHandler;
27 import org.xml.sax.ContentHandler;
28
29
30
31
32 public class GzipParserTest extends TestCase {
33
34 public void testGzipParsing() throws Exception {
35 Parser parser = new AutoDetectParser();
36 ContentHandler handler = new BodyContentHandler();
37 Metadata metadata = new Metadata();
38
39 InputStream stream = GzipParserTest.class.getResourceAsStream(
40 "/test-documents/test-documents.tgz");
41 try {
42 parser.parse(stream, handler, metadata);
43 } finally {
44 stream.close();
45 }
46
47 assertEquals("application/x-gzip", metadata.get(Metadata.CONTENT_TYPE));
48 String content = handler.toString();
49 assertTrue(content.contains("test-documents/testEXCEL.xls"));
50 assertTrue(content.contains("Sample Excel Worksheet"));
51 assertTrue(content.contains("test-documents/testHTML.html"));
52 assertTrue(content.contains("Test Indexation Html"));
53 assertTrue(content.contains("test-documents/testOpenOffice2.odt"));
54 assertTrue(content.contains("This is a sample Open Office document"));
55 assertTrue(content.contains("test-documents/testPDF.pdf"));
56 assertTrue(content.contains("Apache Tika"));
57 assertTrue(content.contains("test-documents/testPPT.ppt"));
58 assertTrue(content.contains("Sample Powerpoint Slide"));
59 assertTrue(content.contains("test-documents/testRTF.rtf"));
60 assertTrue(content.contains("indexation Word"));
61 assertTrue(content.contains("test-documents/testTXT.txt"));
62 assertTrue(content.contains("Test d'indexation de Txt"));
63 assertTrue(content.contains("test-documents/testWORD.doc"));
64 assertTrue(content.contains("This is a sample Microsoft Word Document"));
65 assertTrue(content.contains("test-documents/testXML.xml"));
66 assertTrue(content.contains("Rida Benjelloun"));
67 }
68
69 public void testSvgzParsing() throws Exception {
70 Parser parser = new AutoDetectParser();
71 ContentHandler handler = new BodyContentHandler();
72 Metadata metadata = new Metadata();
73
74 InputStream stream = GzipParserTest.class.getResourceAsStream(
75 "/test-documents/testSVG.svgz");
76 try {
77 parser.parse(stream, handler, metadata);
78 } finally {
79 stream.close();
80 }
81
82 assertEquals("application/x-gzip", metadata.get(Metadata.CONTENT_TYPE));
83 String content = handler.toString();
84 assertTrue(content.contains("Test SVG image"));
85 }
86
87 }