|
|||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectjava.util.Observable
org.exist.storage.TextSearchEngine
org.exist.storage.NativeTextEngine
public class NativeTextEngine
This class is responsible for fulltext-indexing. Text-nodes are handed over
to this class to be fulltext-indexed. Method storeText() is called by
RelationalBroker whenever it finds a TextNode. Method getNodeIDsContaining()
is used by the XPath-engine to process queries where a fulltext-operator is
involved. The class keeps two database tables: table dbTokens
stores the words
found with their unique id. Table invertedIndex
contains the word occurrences for
every word-id per document.
TODO: store node type (attribute or text) with each entry
Field Summary | |
---|---|
static int |
ATTRIBUTE_BY_QNAME
|
static int |
ATTRIBUTE_NOT_BY_QNAME
|
static byte |
ATTRIBUTE_SECTION
|
static double |
DEFAULT_WORD_CACHE_GROWTH
|
static double |
DEFAULT_WORD_KEY_THRESHOLD
|
static double |
DEFAULT_WORD_VALUE_THRESHOLD
|
static int |
DO_NOT_TOKENIZE
|
static String |
FILE_KEY_IN_CONFIG
|
static String |
FILE_NAME
|
static int |
FOURTH_OPTION
|
static int |
LENGTH_NODE_IDS_FREQ_OFFSETS
|
static int |
LENGTH_NODE_TYPE
|
static int |
MAX_TOKEN_LENGTH
Length limit for the tokens |
static int |
OFFSET_ATTRIBUTE_DLN_LENGTH
|
static int |
OFFSET_DLN
|
static int |
OFFSET_ELEMENT_CHILDREN_COUNT
|
static int |
OFFSET_NODE_TYPE
|
static int |
OFFSET_TEXT_DLN_LENGTH
|
static byte |
QNAME_SECTION
|
static int |
TEXT_BY_QNAME
|
static byte |
TEXT_SECTION
|
static int |
TOKENIZE
|
Fields inherited from class org.exist.storage.TextSearchEngine |
---|
CONFIGURATION_STOPWORDS_ELEMENT_NAME, INDEX_NUMBERS_ATTRIBUTE, PROPERTY_INDEX_NUMBERS, PROPERTY_STEM, PROPERTY_STOPWORD_FILE, PROPERTY_STORE_TERM_FREQUENCY, PROPERTY_TOKENIZER, STEM_ATTRIBUTE, STOPWORD_FILE_ATTRIBUTE, STORE_TERM_FREQUENCY_ATTRIBUTE, TOKENIZER_ATTRIBUTE |
Constructor Summary | |
---|---|
NativeTextEngine(DBBroker broker,
BFile dbFile,
Configuration config)
|
Method Summary | |
---|---|
boolean |
close()
|
void |
closeAndRemove()
|
static boolean |
containsWildcards(String str)
Checks if the given string could be a regular expression. |
void |
dropIndex(Collection collection)
Remove index entries for an entire collection. |
void |
dropIndex(DocumentImpl document)
Remove all index entries for the given document. |
void |
flush()
|
String |
getConfigKeyForFile()
|
String |
getFileName()
|
String[] |
getIndexTerms(DocumentSet docs,
TermMatcher matcher)
|
NativeTextEngine |
getInstance()
|
NodeSet |
getNodes(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
TermMatcher matcher,
CharSequence startTerm)
|
NodeSet |
getNodesContaining(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
String expr,
int type,
boolean matchAll)
For each of the given search terms and each of the documents in the document set, return a node-set of matching nodes. |
NodeSet |
getNodesExact(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
String expr)
Get all nodes whose content exactly matches the give expression. |
int |
getTrackMatches()
|
void |
printStatistics()
|
void |
remove()
remove all pending modifications, for the current document. |
void |
removeNode(StoredNode node,
NodePath currentPath,
String content)
The given node is being removed from the database. |
Occurrences[] |
scanIndexTerms(DocumentSet docs,
NodeSet contextSet,
QName[] qnames,
String start,
String end)
|
Occurrences[] |
scanIndexTerms(DocumentSet docs,
NodeSet contextSet,
String start,
String end)
Queries the fulltext index to retrieve information on indexed words contained in the index for the current collection. |
void |
setDocument(DocumentImpl document)
set the current document; generally called before calling an operation |
void |
setTrackMatches(int flags)
|
static boolean |
startsWithWildcard(String str)
|
void |
storeAttribute(AttrImpl node,
NodePath currentPath,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
Indexes the tokens contained in an attribute. |
void |
storeAttribute(AttrImpl node,
NodePath currentPath,
int indexingHint,
RangeIndexSpec idx,
boolean remove)
store and index given attribute |
void |
storeText(StoredNode parent,
ElementContent text,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
|
void |
storeText(TextImpl node,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
Indexes the tokens contained in a text node. |
void |
storeText(TextImpl node,
NodePath currentPath,
int indexingHint)
store and index given text node |
void |
sync()
triggers a cache sync, i.e. |
String |
toString()
|
Methods inherited from class org.exist.storage.TextSearchEngine |
---|
getNodesContaining, getTokenizer |
Methods inherited from class java.util.Observable |
---|
addObserver, countObservers, deleteObserver, deleteObservers, hasChanged, notifyObservers, notifyObservers |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
public static final String FILE_NAME
public static final String FILE_KEY_IN_CONFIG
public static final double DEFAULT_WORD_CACHE_GROWTH
public static final double DEFAULT_WORD_KEY_THRESHOLD
public static final double DEFAULT_WORD_VALUE_THRESHOLD
public static final byte TEXT_SECTION
public static final byte ATTRIBUTE_SECTION
public static final byte QNAME_SECTION
public static int ATTRIBUTE_BY_QNAME
public static int ATTRIBUTE_NOT_BY_QNAME
public static int TOKENIZE
public static int DO_NOT_TOKENIZE
public static int TEXT_BY_QNAME
public static int FOURTH_OPTION
public static final int LENGTH_NODE_TYPE
public static final int LENGTH_NODE_IDS_FREQ_OFFSETS
public static final int OFFSET_NODE_TYPE
public static final int OFFSET_ELEMENT_CHILDREN_COUNT
public static final int OFFSET_ATTRIBUTE_DLN_LENGTH
public static final int OFFSET_TEXT_DLN_LENGTH
public static final int OFFSET_DLN
public static final int MAX_TOKEN_LENGTH
Constructor Detail |
---|
public NativeTextEngine(DBBroker broker, BFile dbFile, Configuration config) throws DBException
DBException
Method Detail |
---|
public String getFileName()
public String getConfigKeyForFile()
public NativeTextEngine getInstance()
public static final boolean containsWildcards(String str)
str
- The stringpublic static final boolean startsWithWildcard(String str)
public int getTrackMatches()
getTrackMatches
in class TextSearchEngine
public void setTrackMatches(int flags)
setTrackMatches
in class TextSearchEngine
public void setDocument(DocumentImpl document)
ContentLoadingObserver
setDocument
in interface ContentLoadingObserver
public void storeAttribute(AttrImpl node, NodePath currentPath, int indexingHint, FulltextIndexSpec indexSpec, boolean remove)
node
- The attribute to be indexedpublic void storeAttribute(AttrImpl node, NodePath currentPath, int indexingHint, RangeIndexSpec idx, boolean remove)
ContentLoadingObserver
storeAttribute
in interface ContentLoadingObserver
public void storeText(TextImpl node, int indexingHint, FulltextIndexSpec indexSpec, boolean remove)
storeText
in class TextSearchEngine
indexSpec
- The index configurationnode
- The text node to be indexedindexingHint
- if true
, given text is indexed as a single token
if false
, it is tokenized before being indexedpublic void storeText(StoredNode parent, ElementContent text, int indexingHint, FulltextIndexSpec indexSpec, boolean remove)
storeText
in class TextSearchEngine
public void storeText(TextImpl node, NodePath currentPath, int indexingHint)
ContentLoadingObserver
storeText
in interface ContentLoadingObserver
public void removeNode(StoredNode node, NodePath currentPath, String content)
ContentLoadingObserver
removeNode
in interface ContentLoadingObserver
public void sync()
ContentLoadingObserver
sync
in interface ContentLoadingObserver
public void flush()
flush
in interface ContentLoadingObserver
flush
in class TextSearchEngine
public void remove()
ContentLoadingObserver
remove
in interface ContentLoadingObserver
public void dropIndex(Collection collection)
TextSearchEngine
dropIndex
in interface ContentLoadingObserver
dropIndex
in class TextSearchEngine
public void dropIndex(DocumentImpl document)
TextSearchEngine
dropIndex
in interface ContentLoadingObserver
dropIndex
in class TextSearchEngine
public NodeSet getNodesContaining(XQueryContext context, DocumentSet docs, NodeSet contextSet, int axis, QName qname, String expr, int type, boolean matchAll) throws TerminatedException
TextSearchEngine
getNodesContaining
in class TextSearchEngine
TerminatedException
public NodeSet getNodesExact(XQueryContext context, DocumentSet docs, NodeSet contextSet, int axis, QName qname, String expr) throws TerminatedException
TerminatedException
public NodeSet getNodes(XQueryContext context, DocumentSet docs, NodeSet contextSet, int axis, QName qname, TermMatcher matcher, CharSequence startTerm) throws TerminatedException
getNodes
in class TextSearchEngine
TerminatedException
public String[] getIndexTerms(DocumentSet docs, TermMatcher matcher)
getIndexTerms
in class TextSearchEngine
public Occurrences[] scanIndexTerms(DocumentSet docs, NodeSet contextSet, String start, String end) throws PermissionDeniedException
TextSearchEngine
Occurrences
for all
words contained in the index. If param end is null, all words starting with
the string sequence param start are returned. Otherwise, the method
returns all words that come after start and before end in lexical order.
scanIndexTerms
in class TextSearchEngine
PermissionDeniedException
public Occurrences[] scanIndexTerms(DocumentSet docs, NodeSet contextSet, QName[] qnames, String start, String end) throws PermissionDeniedException
scanIndexTerms
in class TextSearchEngine
PermissionDeniedException
public void closeAndRemove()
closeAndRemove
in interface ContentLoadingObserver
public boolean close() throws DBException
close
in interface ContentLoadingObserver
close
in class TextSearchEngine
DBException
public void printStatistics()
printStatistics
in interface ContentLoadingObserver
public String toString()
toString
in class Object
|
|||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |