1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika.parser.mbox;
18  
19  import static org.mockito.Matchers.any;
20  import static org.mockito.Matchers.eq;
21  import static org.mockito.Mockito.mock;
22  import static org.mockito.Mockito.verify;
23  import static org.mockito.Mockito.times;
24  
25  import java.io.InputStream;
26  import java.util.HashMap;
27  import java.util.Map;
28  
29  import junit.framework.TestCase;
30  
31  import org.apache.tika.metadata.Metadata;
32  import org.apache.tika.parser.ParseContext;
33  import org.apache.tika.parser.Parser;
34  import org.apache.tika.sax.XHTMLContentHandler;
35  
36  import org.xml.sax.Attributes;
37  import org.xml.sax.ContentHandler;
38  import org.xml.sax.helpers.DefaultHandler;
39  
40  public class MboxParserTest extends TestCase {
41  
42      public void testSimple() {
43          Parser parser = new MboxParser();
44          Metadata metadata = new Metadata();
45          InputStream stream = getStream("test-documents/simple.mbox");
46          ContentHandler handler = mock(DefaultHandler.class);
47  
48          try {
49              parser.parse(stream, handler, metadata, new ParseContext());
50              verify(handler).startDocument();
51              verify(handler, times(2)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
52              verify(handler, times(2)).endElement(XHTMLContentHandler.XHTML, "p", "p");
53              verify(handler).characters(new String("Test content 1").toCharArray(), 0, 14);
54              verify(handler).characters(new String("Test content 2").toCharArray(), 0, 14);
55              verify(handler).endDocument();
56          } catch (Exception e) {
57              fail("Exception thrown: " + e.getMessage());
58          }
59      }
60  
61      public void testHeaders() {
62          Parser parser = new MboxParser();
63          Metadata metadata = new Metadata();
64          InputStream stream = getStream("test-documents/headers.mbox");
65          ContentHandler handler = mock(DefaultHandler.class);
66  
67          try {
68              parser.parse(stream, handler, metadata, new ParseContext());
69  
70              verify(handler).startDocument();
71              verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
72              verify(handler).characters(new String("Test content").toCharArray(), 0, 12);
73              verify(handler).endDocument();
74  
75              assertEquals("subject", metadata.get(Metadata.TITLE));
76              assertEquals("subject", metadata.get(Metadata.SUBJECT));
77              assertEquals("<author@domain.com>", metadata.get(Metadata.AUTHOR));
78              assertEquals("<author@domain.com>", metadata.get(Metadata.CREATOR));
79              assertEquals("<name@domain.com>", metadata.get("MboxParser-return-path"));
80              assertEquals("Tue, 9 Jun 2009 23:58:45 -0400", metadata.get(Metadata.DATE));
81          } catch (Exception e) {
82              fail("Exception thrown: " + e.getMessage());
83          }
84      }
85  
86      public void testMultilineHeader() {
87          Parser parser = new MboxParser();
88          Metadata metadata = new Metadata();
89          InputStream stream = getStream("test-documents/multiline.mbox");
90          ContentHandler handler = mock(DefaultHandler.class);
91  
92          try {
93              parser.parse(stream, handler, metadata, new ParseContext());
94  
95              verify(handler).startDocument();
96              verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
97              verify(handler).characters(new String("Test content").toCharArray(), 0, 12);
98              verify(handler).endDocument();
99  
100             assertEquals("from xxx by xxx with xxx; date", metadata.get("MboxParser-received"));
101         } catch (Exception e) {
102             fail("Exception thrown: " + e.getMessage());
103         }
104     }
105 
106     public void testQuoted() {
107         Parser parser = new MboxParser();
108         Metadata metadata = new Metadata();
109         InputStream stream = getStream("test-documents/quoted.mbox");
110         ContentHandler handler = mock(DefaultHandler.class);
111 
112         try {
113             parser.parse(stream, handler, metadata, new ParseContext());
114 
115             verify(handler).startDocument();
116             verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
117             verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"), any(Attributes.class));
118             verify(handler).endElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"));
119             verify(handler).endElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"));
120             verify(handler).characters(new String("Test content").toCharArray(), 0, 12);
121             verify(handler).characters(new String("> quoted stuff").toCharArray(), 0, 14);
122             verify(handler).endDocument();
123         } catch (Exception e) {
124             fail("Exception thrown: " + e.getMessage());
125         }
126     }
127 
128     public void testComplex() {
129         Parser parser = new MboxParser();
130         Metadata metadata = new Metadata();
131         InputStream stream = getStream("test-documents/complex.mbox");
132         ContentHandler handler = mock(DefaultHandler.class);
133 
134         try {
135             parser.parse(stream, handler, metadata, new ParseContext());
136 
137             verify(handler).startDocument();
138             verify(handler, times(3)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
139             verify(handler, times(3)).endElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"));
140             verify(handler, times(3)).startElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"), any(Attributes.class));
141             verify(handler, times(3)).endElement(eq(XHTMLContentHandler.XHTML), eq("q"), eq("q"));
142             verify(handler).endDocument();
143         } catch (Exception e) {
144             fail("Exception thrown: " + e.getMessage());
145         }
146     }
147 
148     private static InputStream getStream(String name) {
149         return Thread.currentThread().getContextClassLoader()
150         .getResourceAsStream(name);
151     }
152 
153 
154 }