public class LuceneArticleIndex extends ArticleIndex
| Modifier and Type | Field and Description |
|---|---|
protected org.apache.lucene.analysis.Analyzer |
analyzer |
protected static String |
FIELD_DESCRIPTION |
protected static String |
FIELD_NAMES |
protected static String |
FIELD_REFIDS |
protected static String |
FIELD_REFIDS_FREEBASE |
protected static String |
FIELD_REFIDS_ICD10 |
protected static String |
FIELD_REFIDS_UMLS |
protected static String |
FIELD_REFIDS_WIKIDATA |
protected static String |
FIELD_REFIDS_WIKIPEDIA |
protected static String |
FIELD_REFURLS_WIKIPEDIA |
protected static String |
FIELD_TERMS |
protected static String |
FIELD_TEXT |
protected static String |
FIELD_TITLE |
protected static String |
FIELD_TYPE |
protected static String |
FIELD_VECTOR |
protected static org.slf4j.Logger |
log |
protected static int |
NUM_BM25_CANDIDATES |
protected static String |
PARAM_FUZZY |
protected static String |
PARAM_PROXIMITY |
protected org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess |
preprocessor |
protected org.apache.lucene.index.IndexReader |
reader |
protected org.apache.lucene.search.IndexSearcher |
searcher |
protected info.debatty.java.stringsimilarity.interfaces.NormalizedStringSimilarity |
sim |
| Constructor and Description |
|---|
LuceneArticleIndex() |
| Modifier and Type | Method and Description |
|---|---|
protected void |
addNameField(org.apache.lucene.document.Document doc,
String name,
String value)
A NameField is a tokenized lowercase field in Lucene.
|
protected void |
addStringField(org.apache.lucene.document.Document doc,
String name,
String value)
A StringField is a non-tokenized field in Lucene.
|
protected void |
addTextField(org.apache.lucene.document.Document doc,
String name,
String value,
org.apache.lucene.document.Field.Store store)
A TextField is a tokenized field in Lucene.
|
protected void |
addVectorField(org.apache.lucene.document.Document doc,
String name,
org.nd4j.linalg.api.ndarray.INDArray arr)
A VectorField is a compressed INDArray as Base64 encoded string.
|
protected org.apache.lucene.analysis.Analyzer |
buildAnalyzer() |
void |
createIndex(Iterator<? extends Article> articles,
org.apache.lucene.store.Directory index) |
void |
createIndexDirectory(Iterator<? extends Article> articles,
Resource cacheDir) |
void |
createIndexRAM(Iterator<? extends Article> articles)
Creates and opens a new index in local RAM
|
protected org.apache.lucene.document.Document |
createLuceneDocument(WikiDataArticle article) |
protected Article |
createWikidataArticle(org.apache.lucene.document.Document doc) |
protected ArticleRef |
createWikidataArticleRef(org.apache.lucene.document.Document doc) |
protected String |
decodeWikiUrl(String url) |
Collection<String> |
getAllArticleIDs() |
Collection<String> |
getAllArticleNames() |
Collection<String> |
getAllArticleTerms() |
Collection<String> |
getAllArticleTitles() |
Collection<String> |
getAllArticleURLs() |
protected Collection<String> |
getAllFields(String field) |
boolean |
openIndex(Resource path)
tries to open an existing index from given path
|
Optional<ArticleRef> |
queryID(String id)
Retrieve the article with a given ID
|
protected Optional<org.apache.lucene.document.Document> |
queryIndexID(String field,
String id) |
protected List<org.apache.lucene.document.Document> |
queryIndexNames(String name,
int hits) |
protected List<ArticleRef> |
queryIndexPrefix(String prefix,
int hits) |
List<ArticleRef> |
queryNames(String name,
int hits)
Retrieve candidates for a query on the "name" field.
|
List<ArticleRef> |
queryPrefixNames(String prefix,
int hits)
Retrieve candidates for auto completion on the "name" field.
|
Optional<ArticleRef> |
queryWikidataID(String id)
Retrieve the article with a given ID
|
Optional<ArticleRef> |
queryWikipediaPage(String name)
Retrieve the article with a given name
|
Optional<ArticleRef> |
queryWikipediaURL(String url)
Retrieve the article with a given URL
|
protected String |
splitString(String name,
String suffix) |
protected static final org.slf4j.Logger log
protected static final String FIELD_TITLE
protected static final String FIELD_TYPE
protected static final String FIELD_REFIDS
protected static final String FIELD_REFIDS_WIKIDATA
protected static final String FIELD_REFIDS_WIKIPEDIA
protected static final String FIELD_REFIDS_FREEBASE
protected static final String FIELD_REFIDS_UMLS
protected static final String FIELD_REFIDS_ICD10
protected static final String FIELD_REFURLS_WIKIPEDIA
protected static final String FIELD_TEXT
protected static final String FIELD_DESCRIPTION
protected static final String FIELD_NAMES
protected static final String FIELD_TERMS
protected static final String FIELD_VECTOR
protected static final String PARAM_PROXIMITY
protected static final String PARAM_FUZZY
protected static final int NUM_BM25_CANDIDATES
protected org.apache.lucene.index.IndexReader reader
protected org.apache.lucene.search.IndexSearcher searcher
protected org.apache.lucene.analysis.Analyzer analyzer
protected final info.debatty.java.stringsimilarity.interfaces.NormalizedStringSimilarity sim
protected final org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess preprocessor
public boolean openIndex(Resource path)
public void createIndexRAM(Iterator<? extends Article> articles)
public void createIndexDirectory(Iterator<? extends Article> articles, Resource cacheDir) throws IOException
IOExceptionpublic void createIndex(Iterator<? extends Article> articles, org.apache.lucene.store.Directory index)
public List<ArticleRef> queryNames(String name, int hits)
queryNames in class ArticleIndexname - The name to search, e.g. "obama barack"hits - Max number of hits to generatepublic List<ArticleRef> queryPrefixNames(String prefix, int hits)
queryPrefixNames in class ArticleIndexprefix - hits - public Optional<ArticleRef> queryID(String id)
queryID in class ArticleIndexid - The indexed ID, e.g. "Q64"public Optional<ArticleRef> queryWikidataID(String id)
id - The Wikidata ID, e.g. "Q64"public Optional<ArticleRef> queryWikipediaURL(String url)
public Optional<ArticleRef> queryWikipediaPage(String name)
public Collection<String> getAllArticleTitles()
public Collection<String> getAllArticleNames()
public Collection<String> getAllArticleTerms()
public Collection<String> getAllArticleURLs()
public Collection<String> getAllArticleIDs()
protected Collection<String> getAllFields(String field)
protected org.apache.lucene.document.Document createLuceneDocument(WikiDataArticle article)
protected ArticleRef createWikidataArticleRef(org.apache.lucene.document.Document doc)
protected Article createWikidataArticle(org.apache.lucene.document.Document doc)
protected void addTextField(org.apache.lucene.document.Document doc,
String name,
String value,
org.apache.lucene.document.Field.Store store)
protected void addStringField(org.apache.lucene.document.Document doc,
String name,
String value)
protected void addNameField(org.apache.lucene.document.Document doc,
String name,
String value)
protected void addVectorField(org.apache.lucene.document.Document doc,
String name,
org.nd4j.linalg.api.ndarray.INDArray arr)
protected org.apache.lucene.analysis.Analyzer buildAnalyzer()
protected List<org.apache.lucene.document.Document> queryIndexNames(String name, int hits)
protected List<ArticleRef> queryIndexPrefix(String prefix, int hits)
Copyright © 2019. All rights reserved.