1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.tika.parser.microsoft.ooxml; 18 19 import java.io.IOException; 20 21 import org.apache.poi.POIXMLDocument; 22 import org.apache.tika.metadata.Metadata; 23 import org.apache.xmlbeans.XmlException; 24 import org.xml.sax.ContentHandler; 25 import org.xml.sax.SAXException; 26 27 /** 28 * Interface implemented by all Tika OOXML extractors. 29 * 30 * @see org.apache.poi.POIXMLTextExtractor 31 */ 32 public interface OOXMLExtractor { 33 34 /** 35 * Returns the opened document. 36 * 37 * @see POIXMLTextExtractor#getDocument() 38 */ 39 POIXMLDocument getDocument(); 40 41 /** 42 * {@link POIXMLTextExtractor#getMetadataTextExtractor()} not yet supported 43 * for OOXML by POI. 44 */ 45 MetadataExtractor getMetadataExtractor(); 46 47 /** 48 * Parses the document into a sequence of XHTML SAX events sent to the 49 * given content handler. 50 */ 51 void getXHTML(ContentHandler handler, Metadata metadata) 52 throws SAXException, XmlException, IOException; 53 }