1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.epub;
18
19 import java.io.InputStream;
20
21 import junit.framework.TestCase;
22
23 import org.apache.tika.metadata.Metadata;
24 import org.apache.tika.sax.BodyContentHandler;
25 import org.xml.sax.ContentHandler;
26
27 public class EpubParserTest extends TestCase {
28
29 public void testXMLParser() throws Exception {
30 InputStream input = EpubParserTest.class.getResourceAsStream(
31 "/test-documents/testEPUB.epub");
32 try {
33 Metadata metadata = new Metadata();
34 ContentHandler handler = new BodyContentHandler();
35 new EpubParser().parse(input, handler, metadata);
36
37 assertEquals("application/epub+zip",
38 metadata.get(Metadata.CONTENT_TYPE));
39 assertEquals("en",
40 metadata.get(Metadata.LANGUAGE));
41 assertEquals("This is an ePub test publication for Tika.",
42 metadata.get(Metadata.DESCRIPTION));
43 assertEquals("Apache",
44 metadata.get(Metadata.PUBLISHER));
45
46 String content = handler.toString();
47 assertTrue(content.contains("Plus a simple div"));
48 assertTrue(content.contains("First item"));
49 assertTrue(content.contains("The previous headings were subchapters"));
50 assertTrue(content.contains("Table data"));
51 } finally {
52 input.close();
53 }
54 }
55
56 }