org.apache.tika.parser.html
Class HtmlParserTest

java.lang.Object
  extended by TestCase
      extended by org.apache.tika.parser.html.HtmlParserTest

public class HtmlParserTest
extends TestCase


Constructor Summary
HtmlParserTest()
           
 
Method Summary
 void testBaseHref()
          Test case for TIKA-287
 void testCharactersDirectlyUnderBodyElement()
          Test case for TIKA-210
 void testDetectOfCharset()
          Test case for TIKA-334
 void testHttpEquivCharset()
          Test case for TIKA-332
 void testHttpEquivCharsetFunkyAttributes()
          Test case for TIKA-349
 void testIgnoreCharsetDetectorLanguage()
          Test case for TIKA-339: Don't use language returned by CharsetDetector
 void testLineBreak()
          Test case for HTML content like ">div<foo>br<bar>/div>" that should result in three whitespace-separated tokens "foo", "bar" and "baz" instead of a single token "foobarbaz".
 void testMetaHttpEquivWithLotsOfPreambleText()
          Test case for TIKA-357
 void testParseAscii()
           
 void testParseEmpty()
           
 void testUsingCharsetInContentTypeHeader()
          Test case for TIKA-341
 void testUsingFunkyCharsetInContentTypeHeader()
          Test case for TIKA-350
 void testWhitespaceBetweenTableCells()
          Test case for TIKA-268
 void testXhtmlParsing()
           
 void XtestParseUTF8()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HtmlParserTest

public HtmlParserTest()
Method Detail

testParseAscii

public void testParseAscii()
                    throws java.lang.Exception
Throws:
java.lang.Exception

XtestParseUTF8

public void XtestParseUTF8()
                    throws java.io.IOException,
                           org.xml.sax.SAXException,
                           org.apache.tika.exception.TikaException
Throws:
java.io.IOException
org.xml.sax.SAXException
org.apache.tika.exception.TikaException

testXhtmlParsing

public void testXhtmlParsing()
                      throws java.lang.Exception
Throws:
java.lang.Exception

testParseEmpty

public void testParseEmpty()
                    throws java.lang.Exception
Throws:
java.lang.Exception

testCharactersDirectlyUnderBodyElement

public void testCharactersDirectlyUnderBodyElement()
                                            throws java.lang.Exception
Test case for TIKA-210

Throws:
java.lang.Exception
See Also:
TIKA-210

testBaseHref

public void testBaseHref()
                  throws java.lang.Exception
Test case for TIKA-287

Throws:
java.lang.Exception
See Also:
TIKA-287

testWhitespaceBetweenTableCells

public void testWhitespaceBetweenTableCells()
                                     throws java.lang.Exception
Test case for TIKA-268

Throws:
java.lang.Exception
See Also:
TIKA-268

testHttpEquivCharset

public void testHttpEquivCharset()
                          throws java.lang.Exception
Test case for TIKA-332

Throws:
java.lang.Exception
See Also:
TIKA-332

testDetectOfCharset

public void testDetectOfCharset()
                         throws java.lang.Exception
Test case for TIKA-334

Throws:
java.lang.Exception
See Also:
TIKA-334

testUsingCharsetInContentTypeHeader

public void testUsingCharsetInContentTypeHeader()
                                         throws java.lang.Exception
Test case for TIKA-341

Throws:
java.lang.Exception
See Also:
TIKA-341

testLineBreak

public void testLineBreak()
                   throws java.lang.Exception
Test case for HTML content like ">div<foo>br<bar>/div>" that should result in three whitespace-separated tokens "foo", "bar" and "baz" instead of a single token "foobarbaz".

Throws:
java.lang.Exception
See Also:
TIKA-343

testIgnoreCharsetDetectorLanguage

public void testIgnoreCharsetDetectorLanguage()
                                       throws java.lang.Exception
Test case for TIKA-339: Don't use language returned by CharsetDetector

Throws:
java.lang.Exception
See Also:
TIKA-339

testHttpEquivCharsetFunkyAttributes

public void testHttpEquivCharsetFunkyAttributes()
                                         throws java.lang.Exception
Test case for TIKA-349

Throws:
java.lang.Exception
See Also:
TIKA-349

testUsingFunkyCharsetInContentTypeHeader

public void testUsingFunkyCharsetInContentTypeHeader()
                                              throws java.lang.Exception
Test case for TIKA-350

Throws:
java.lang.Exception
See Also:
TIKA-350

testMetaHttpEquivWithLotsOfPreambleText

public void testMetaHttpEquivWithLotsOfPreambleText()
                                             throws java.lang.Exception
Test case for TIKA-357

Throws:
java.lang.Exception
See Also:
TIKA-357


Copyright © 2007-2010 Apache Software Foundation. All Rights Reserved.