public class MalletTopicModeling extends Object implements ITopicModeling
| Constructor and Description |
|---|
MalletTopicModeling() |
| Modifier and Type | Method and Description |
|---|---|
static double |
computeSimilarity(double[] vectorA,
double[] vectorB) |
String |
getId(org.apache.uima.jcas.JCas aJCas) |
cc.mallet.types.TokenSequence |
getLemmata(org.apache.uima.jcas.JCas aJCas) |
Object[] |
getVocabulary(Model model) |
Map<String,List<Topic>> |
inferLabel(Document doc,
Model model,
org.apache.commons.configuration2.XMLConfiguration xmlConfig)
The argument topicWords has to be passed here due to performance reasons; this object is
available in every model.malletModel by the method getTopWords()
|
Map<String,List<Topic>> |
inferLabel(org.apache.uima.jcas.JCas cas,
Model model,
org.apache.commons.configuration2.XMLConfiguration xmlConfig)
The argument topicWords has to be passed here due to performance reasons; this object is
available in every model.malletModel by the method getTopWords()
|
boolean |
isNotNum(String lemmaString)
Filters simple numbers that does not have real semantics
|
boolean |
isNotPunctuation(String lemmaString) |
List<cc.mallet.types.TokenSequence> |
jcorePreprocess(List<Document> docs) |
org.apache.commons.configuration2.XMLConfiguration |
loadConfig(String configFile) |
cc.mallet.types.InstanceList |
malletPreprocess(List<cc.mallet.types.TokenSequence> data) |
void |
mapMalletIdToPubmedId(List<Document> docs,
Model model) |
void |
mapPubmedIdToMalletId(List<Document> docs,
Model model) |
cc.mallet.types.InstanceList |
preprocess(List<Document> docs) |
List<Document> |
readDocuments(File file,
org.apache.commons.configuration2.XMLConfiguration xmlConfig) |
Model |
readMalletModel(File file) |
Model |
readModel(String filename) |
List<Document> |
readXmiDb(MalletTopicModeling tm,
org.apache.commons.configuration2.HierarchicalConfiguration<org.apache.commons.configuration2.tree.ImmutableNode> configuration) |
void |
saveMalletModel(Model model,
File file) |
void |
saveModel(Model model,
String filename) |
TMSearchResult |
search(Document query,
Model model,
org.apache.commons.configuration2.XMLConfiguration xmlConfig) |
TMSearchResult |
searchIndexOnly(Document query,
Model model,
org.apache.commons.configuration2.XMLConfiguration xmlConfig) |
TMSearchResult |
searchModelOnly(Document query,
Model model,
org.apache.commons.configuration2.XMLConfiguration xmlConfig) |
Model |
train(cc.mallet.types.InstanceList instances,
org.apache.commons.configuration2.XMLConfiguration xmlConfig) |
Model |
train(List<Document> docs,
org.apache.commons.configuration2.XMLConfiguration xmlConfig) |
public static double computeSimilarity(double[] vectorA,
double[] vectorB)
public org.apache.commons.configuration2.XMLConfiguration loadConfig(String configFile) throws org.apache.commons.configuration2.ex.ConfigurationException
org.apache.commons.configuration2.ex.ConfigurationExceptionpublic Model train(List<Document> docs, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
train in interface ITopicModelingpublic Model train(cc.mallet.types.InstanceList instances, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
public void saveModel(Model model, String filename)
saveModel in interface ITopicModelingpublic List<Document> readDocuments(File file, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
readDocuments in interface ITopicModelingpublic List<Document> readXmiDb(MalletTopicModeling tm, org.apache.commons.configuration2.HierarchicalConfiguration<org.apache.commons.configuration2.tree.ImmutableNode> configuration)
public TMSearchResult search(Document query, Model model, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
search in interface ITopicModelingpublic TMSearchResult searchModelOnly(Document query, Model model, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
public TMSearchResult searchIndexOnly(Document query, Model model, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
public Map<String,List<Topic>> inferLabel(Document doc, Model model, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
inferLabel in interface ITopicModelingpublic Map<String,List<Topic>> inferLabel(org.apache.uima.jcas.JCas cas, Model model, org.apache.commons.configuration2.XMLConfiguration xmlConfig)
public Model readModel(String filename)
readModel in interface ITopicModelingpublic cc.mallet.types.InstanceList malletPreprocess(List<cc.mallet.types.TokenSequence> data)
public cc.mallet.types.TokenSequence getLemmata(org.apache.uima.jcas.JCas aJCas)
public String getId(org.apache.uima.jcas.JCas aJCas)
public boolean isNotNum(String lemmaString)
public boolean isNotPunctuation(String lemmaString)
Copyright © 2018 JULIE Lab, Germany. All rights reserved.