1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.xml;
18
19 import java.io.InputStream;
20
21 import junit.framework.TestCase;
22
23 import org.apache.tika.metadata.Metadata;
24 import org.apache.tika.sax.BodyContentHandler;
25 import org.xml.sax.ContentHandler;
26 import org.xml.sax.helpers.DefaultHandler;
27
28 public class DcXMLParserTest extends TestCase {
29
30 public void testXMLParserAsciiChars() throws Exception {
31 InputStream input = DcXMLParserTest.class.getResourceAsStream(
32 "/test-documents/testXML.xml");
33 try {
34 Metadata metadata = new Metadata();
35 ContentHandler handler = new BodyContentHandler();
36 new DcXMLParser().parse(input, handler, metadata);
37
38 assertEquals(
39 "application/xml",
40 metadata.get(Metadata.CONTENT_TYPE));
41 assertEquals("Tika test document", metadata.get(Metadata.TITLE));
42 assertEquals("Rida Benjelloun", metadata.get(Metadata.CREATOR));
43 assertEquals(
44 "Java, XML, XSLT, JDOM, Indexation",
45 metadata.get(Metadata.SUBJECT));
46 assertEquals(
47 "Framework d\'indexation des documents XML, HTML, PDF etc.. ",
48 metadata.get(Metadata.DESCRIPTION));
49 assertEquals(
50 "http://www.apache.org",
51 metadata.get(Metadata.IDENTIFIER));
52 assertEquals("test", metadata.get(Metadata.TYPE));
53 assertEquals("application/msword", metadata.get(Metadata.FORMAT));
54 assertEquals("Fr", metadata.get(Metadata.LANGUAGE));
55 assertTrue(metadata.get(Metadata.RIGHTS).contains("testing chars"));
56
57 String content = handler.toString();
58 assertTrue(content.contains("Tika test document"));
59 } finally {
60 input.close();
61 }
62 }
63
64 public void testXMLParserNonAsciiChars() throws Exception {
65 InputStream input = DcXMLParserTest.class.getResourceAsStream("/test-documents/testXML.xml");
66 try {
67 Metadata metadata = new Metadata();
68 new DcXMLParser().parse(input, new DefaultHandler(), metadata);
69
70 final String expected = "Archim\u00E8de et Lius \u00E0 Ch\u00E2teauneuf testing chars en \u00E9t\u00E9";
71 assertEquals(expected,metadata.get(Metadata.RIGHTS));
72 } finally {
73 input.close();
74 }
75 }
76
77 }