1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.microsoft;
18
19 import java.io.InputStream;
20 import java.util.regex.Matcher;
21 import java.util.regex.Pattern;
22
23 import junit.framework.TestCase;
24
25 import org.apache.tika.metadata.Metadata;
26 import org.apache.tika.parser.AutoDetectParser;
27 import org.apache.tika.parser.Parser;
28 import org.apache.tika.sax.BodyContentHandler;
29 import org.xml.sax.ContentHandler;
30
31
32
33
34 public class OutlookParserTest extends TestCase {
35
36 public void testOutlookParsing() throws Exception {
37 Parser parser = new AutoDetectParser();
38 ContentHandler handler = new BodyContentHandler();
39 Metadata metadata = new Metadata();
40
41 InputStream stream = OutlookParserTest.class.getResourceAsStream(
42 "/test-documents/test-outlook.msg");
43 try {
44 parser.parse(stream, handler, metadata);
45 } finally {
46 stream.close();
47 }
48
49 assertEquals(
50 "application/vnd.ms-outlook",
51 metadata.get(Metadata.CONTENT_TYPE));
52 assertEquals(
53 "Microsoft Outlook Express 6",
54 metadata.get(Metadata.TITLE));
55
56
57
58
59
60 String content = handler.toString();
61 assertTrue(content.contains("Microsoft Outlook Express 6"));
62
63 assertTrue(content.contains("Nouvel utilisateur de Outlook Express"));
64 assertTrue(content.contains("Messagerie et groupes de discussion"));
65 }
66
67
68
69
70
71
72 public void testMultipleCopies() throws Exception {
73 Parser parser = new AutoDetectParser();
74 ContentHandler handler = new BodyContentHandler();
75 Metadata metadata = new Metadata();
76
77 InputStream stream = OutlookParserTest.class.getResourceAsStream(
78 "/test-documents/testMSG.msg");
79 try {
80 parser.parse(stream, handler, metadata);
81 } finally {
82 stream.close();
83 }
84
85 assertEquals(
86 "application/vnd.ms-outlook",
87 metadata.get(Metadata.CONTENT_TYPE));
88
89 String content = handler.toString();
90 Pattern pattern = Pattern.compile("From");
91 Matcher matcher = pattern.matcher(content);
92 assertTrue(matcher.find());
93 assertFalse(matcher.find());
94 }
95
96
97
98
99
100
101 public void testOutlookNew() throws Exception {
102 Parser parser = new AutoDetectParser();
103 ContentHandler handler = new BodyContentHandler();
104 Metadata metadata = new Metadata();
105
106 InputStream stream = OutlookParserTest.class.getResourceAsStream(
107 "/test-documents/test-outlook2003.msg");
108 try {
109 parser.parse(stream, handler, metadata);
110 } finally {
111 stream.close();
112 }
113
114 assertEquals(
115 "application/vnd.ms-outlook",
116 metadata.get(Metadata.CONTENT_TYPE));
117 assertEquals(
118 "Welcome to Microsoft Office Outlook 2003",
119 metadata.get(Metadata.TITLE));
120
121 String content = handler.toString();
122 assertTrue(content.contains("Outlook 2003"));
123 assertTrue(content.contains("Streamlined Mail Experience"));
124 assertTrue(content.contains("Navigation Pane"));
125 }
126
127 }