1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.sax;
18
19 import org.apache.tika.metadata.Metadata;
20 import org.xml.sax.ContentHandler;
21 import org.xml.sax.SAXException;
22
23 import junit.framework.TestCase;
24
25
26
27
28 public class XHTMLContentHandlerTest extends TestCase {
29
30 private ContentHandler output;
31
32 private XHTMLContentHandler xhtml;
33
34 protected void setUp() {
35 output = new BodyContentHandler();
36 xhtml = new XHTMLContentHandler(output, new Metadata());
37 }
38
39
40
41
42
43
44
45 public void testExtraWhitespace() throws SAXException {
46 xhtml.startDocument();
47
48 xhtml.element("p", "foo");
49 xhtml.startElement("p");
50 xhtml.characters("b");
51 xhtml.element("b", "a");
52 xhtml.characters("r");
53 xhtml.endElement("p");
54
55 xhtml.startElement("table");
56 xhtml.startElement("tr");
57 xhtml.element("th", "x");
58 xhtml.element("th", "y");
59 xhtml.endElement("tr");
60 xhtml.startElement("tr");
61 xhtml.element("td", "a");
62 xhtml.element("td", "b");
63 xhtml.endElement("tr");
64 xhtml.endElement("table");
65 xhtml.endDocument();
66
67 String[] words = output.toString().split("\\s+");
68 assertEquals(6, words.length);
69 assertEquals("foo", words[0]);
70 assertEquals("bar", words[1]);
71 assertEquals("x", words[2]);
72 assertEquals("y", words[3]);
73 assertEquals("a", words[4]);
74 assertEquals("b", words[5]);
75 }
76
77 }