1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.mbox;
18
19 import static org.mockito.Matchers.any;
20 import static org.mockito.Matchers.eq;
21 import static org.mockito.Mockito.mock;
22 import static org.mockito.Mockito.verify;
23 import static org.mockito.Mockito.times;
24
25 import java.io.InputStream;
26 import java.util.HashMap;
27 import java.util.Map;
28
29 import junit.framework.TestCase;
30
31 import org.apache.tika.metadata.Metadata;
32 import org.apache.tika.parser.ParseContext;
33 import org.apache.tika.parser.Parser;
34 import org.apache.tika.sax.XHTMLContentHandler;
35
36 import org.xml.sax.Attributes;
37 import org.xml.sax.ContentHandler;
38 import org.xml.sax.helpers.DefaultHandler;
39
40 public class MboxParserTest extends TestCase {
41
42 public void testSimple() {
43 Parser parser = new MboxParser();
44 Metadata metadata = new Metadata();
45 InputStream stream = getStream("test-documents/simple.mbox");
46 ContentHandler handler = mock(DefaultHandler.class);
47
48 try {
49 parser.parse(stream, handler, metadata, new ParseContext());
50 verify(handler).startDocument();
51 verify(handler, times(2)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
52 verify(handler, times(2)).endElement(XHTMLContentHandler.XHTML, "p", "p");
53 verify(handler).characters(new String("Test content 1").toCharArray(), 0, 14);
54 verify(handler).characters(new String("Test content 2").toCharArray(), 0, 14);
55 verify(handler).endDocument();
56 } catch (Exception e) {
57 fail("Exception thrown: " + e.getMessage());
58 }
59 }
60
61 public void testHeaders() {
62 Parser parser = new MboxParser();
63 Metadata metadata = new Metadata();
64 InputStream stream = getStream("test-documents/headers.mbox");
65 ContentHandler handler = mock(DefaultHandler.class);
66
67 try {
68 parser.parse(stream, handler, metadata, new ParseContext());
69
70 verify(handler).startDocument();
71 verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
72 verify(handler).characters(new String("Test content").toCharArray(), 0, 12);
73 verify(handler).endDocument();
74
75 assertEquals("subject", metadata.get(Metadata.TITLE));
76 assertEquals("subject", metadata.get(Metadata.SUBJECT));
77 assertEquals("<author@domain.com>", metadata.get(Metadata.AUTHOR));
78 assertEquals("<author@domain.com>", metadata.get(Metadata.CREATOR));
79 assertEquals("<name@domain.com>", metadata.get("MboxParser-return-path"));
80 assertEquals("Tue, 9 Jun 2009 23:58:45 -0400", metadata.get(Metadata.DATE));
81 } catch (Exception e) {
82 fail("Exception thrown: " + e.getMessage());
83 }
84 }
85
86 public void testMultilineHeader() {
87 Parser parser = new MboxParser();
88 Metadata metadata = new Metadata();
89 InputStream stream = getStream("test-documents/multiline.mbox");
90 ContentHandler handler = mock(DefaultHandler.class);
91
92 try {
93 parser.parse(stream, handler, metadata, new ParseContext());
94
95 verify(handler).startDocument();
96 verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
97 verify(handler).characters(new String("Test content").toCharArray(), 0, 12);
98 verify(handler).endDocument();
99
100 assertEquals("from xxx by xxx with xxx; date", metadata.get("MboxParser-received"));
101 } catch (Exception e) {
102 fail("Exception thrown: " + e.getMessage());
103 }
104 }
105
106 public void testQuoted() {
107 Parser parser = new MboxParser();
108 Metadata metadata = new Metadata();
109 InputStream stream = getStream("test-documents/quoted.mbox");
110 ContentHandler handler = mock(DefaultHandler.class);
111
112 try {
113 parser.parse(stream, handler, metadata, new ParseContext());
114
115 verify(handler).startDocument();
116 verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
117 verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"), any(Attributes.class));
118 verify(handler).endElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"));
119 verify(handler).endElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"));
120 verify(handler).characters(new String("Test content").toCharArray(), 0, 12);
121 verify(handler).characters(new String("> quoted stuff").toCharArray(), 0, 14);
122 verify(handler).endDocument();
123 } catch (Exception e) {
124 fail("Exception thrown: " + e.getMessage());
125 }
126 }
127
128 public void testComplex() {
129 Parser parser = new MboxParser();
130 Metadata metadata = new Metadata();
131 InputStream stream = getStream("test-documents/complex.mbox");
132 ContentHandler handler = mock(DefaultHandler.class);
133
134 try {
135 parser.parse(stream, handler, metadata, new ParseContext());
136
137 verify(handler).startDocument();
138 verify(handler, times(3)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
139 verify(handler, times(3)).endElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"));
140 verify(handler, times(3)).startElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"), any(Attributes.class));
141 verify(handler, times(3)).endElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"));
142 verify(handler).endDocument();
143 } catch (Exception e) {
144 fail("Exception thrown: " + e.getMessage());
145 }
146 }
147
148 private static InputStream getStream(String name) {
149 return Thread.currentThread().getContextClassLoader()
150 .getResourceAsStream(name);
151 }
152
153
154 }