public final class TextProcessor extends Object
| Constructor and Description |
|---|
TextProcessor()
Constructor.
|
| Modifier and Type | Method and Description |
|---|---|
double |
augmentedTF(WordBag bag,
String term)
Compute TF (term frequency) metrics.
|
String |
bagsToTable(List<WordBag> bags) |
int |
classify(String classKey,
double[] series,
HashMap<String,HashMap<String,Double>> tfidf,
Params params) |
int |
classify(String trueClassLabel,
WordBag test,
HashMap<String,HashMap<String,Double>> tfidf) |
String |
classify(WordBag test,
HashMap<String,HashMap<String,Double>> tfidf) |
HashMap<String,HashMap<String,Double>> |
computeTFIDF(Collection<WordBag> texts)
Computes TF*IDF values.
|
double |
cosineDistance(HashMap<String,Double> map1,
HashMap<String,Double> map2)
Computes a cosine similarity.
|
double |
cosineSimilarity(WordBag testSample,
HashMap<String,Double> weightVector) |
double |
cosineSimilarityInstrumented(WordBag testSample,
HashMap<String,Double> weightVector,
HashMap<String,Double> insight) |
int |
df(HashMap<String,WordBag> bags,
String string)
Compute document frequency, DF, metrics.
|
double |
dotProduct(double[] vector1,
double[] vector2)
Compute the dot product of two vectors.
|
double |
dotProduct(Double[] vector1,
Double[] vector2)
Compute the dot product of two vectors.
|
CosineDistanceMatrix |
getCosineDistanceMatrix(HashMap<String,HashMap<String,Double>> tfidf) |
double |
idf(HashMap<String,WordBag> bags,
String string)
Compute idf (inverse document frequency) metrics.
|
List<WordBag> |
labeledSeries2WordBags(Map<String,List<double[]>> data,
Params params) |
double |
logAveTF(WordBag bag,
String term)
Compute TF (term frequency) metrics.
|
double |
logTF(WordBag bag,
String term)
Compute TF (term frequency) metrics.
|
double |
magnitude(double[] vector)
Compute the magnitude of the vector.
|
double |
magnitude(Double[] vector)
Compute the magnitude of the vector.
|
double |
normalizedTF(WordBag bag,
String term)
Compute TF (term frequency) metrics.
|
HashMap<String,Double> |
normalizeToUnitVector(HashMap<String,Double> vector)
Normalize the vector to the norm of 1.
|
HashMap<String,HashMap<String,Double>> |
normalizeToUnitVectors(HashMap<String,HashMap<String,Double>> data)
Computes a cosine normalization of TFIDF statistics.
|
WordBag |
seriesToWordBag(String label,
double[] ts,
Params params)
Converts time series to a word bag.
|
String |
tfidfToTable(HashMap<String,HashMap<String,Double>> tfidf) |
String |
wordBagToTable(WordBag bag) |
public WordBag seriesToWordBag(String label, double[] ts, Params params) throws net.seninp.jmotif.sax.SAXException
label - the wordbag label.ts - timeseries.params - parameters for SAX transform.net.seninp.jmotif.sax.SAXException - if error occurs.public List<WordBag> labeledSeries2WordBags(Map<String,List<double[]>> data, Params params) throws net.seninp.jmotif.sax.SAXException
net.seninp.jmotif.sax.SAXExceptionpublic HashMap<String,HashMap<String,Double>> computeTFIDF(Collection<WordBag> texts)
texts - The collection of text documents for which the statistics need to be computed.public double logTF(WordBag bag, String term)
bag - The words bag.term - The term.public double normalizedTF(WordBag bag, String term)
bag - The words bag.term - The term.public double augmentedTF(WordBag bag, String term)
bag - The words bag.term - The term.public double logAveTF(WordBag bag, String term)
bag - The words bag.term - The term.public int df(HashMap<String,WordBag> bags, String string)
bags - The word bags collection.string - The string term.public double idf(HashMap<String,WordBag> bags, String string)
bags - The bags of words collection.string - The string (term).public HashMap<String,Double> normalizeToUnitVector(HashMap<String,Double> vector)
vector - the vector.public HashMap<String,HashMap<String,Double>> normalizeToUnitVectors(HashMap<String,HashMap<String,Double>> data)
data - The data.public double cosineDistance(HashMap<String,Double> map1, HashMap<String,Double> map2)
map1 - The data vector 1.map2 - The data vector 2.public double cosineSimilarity(WordBag testSample, HashMap<String,Double> weightVector)
public CosineDistanceMatrix getCosineDistanceMatrix(HashMap<String,HashMap<String,Double>> tfidf)
public double magnitude(double[] vector)
vector - The vector.public double magnitude(Double[] vector)
vector - The vector.public double cosineSimilarityInstrumented(WordBag testSample, HashMap<String,Double> weightVector, HashMap<String,Double> insight)
public double dotProduct(double[] vector1,
double[] vector2)
vector1 - The vector 1.vector2 - The vector 2.public double dotProduct(Double[] vector1, Double[] vector2)
vector1 - The vector 1.vector2 - The vector 2.public int classify(String classKey, double[] series, HashMap<String,HashMap<String,Double>> tfidf, Params params) throws net.seninp.jmotif.sax.SAXException
net.seninp.jmotif.sax.SAXExceptionpublic int classify(String trueClassLabel, WordBag test, HashMap<String,HashMap<String,Double>> tfidf)
Copyright © 2021. All rights reserved.