package uk.ac.shef.dcs.jate.feature;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.search.SolrIndexSearcher;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.JATEProperties;
import uk.ac.shef.dcs.jate.util.SolrUtil;

/* loaded from: input_file:uk/ac/shef/dcs/jate/feature/AbstractFeatureBuilder.class */
public abstract class AbstractFeatureBuilder {
    protected SolrIndexSearcher solrIndexSearcher;
    protected JATEProperties properties;
    protected static final int MIN_SEQUENTIAL_THRESHOLD = 100;
    protected static final int MAX_SEQUENTIAL_THRESHOLD = 10000;

    public AbstractFeatureBuilder(SolrIndexSearcher solrIndexSearcher, JATEProperties jATEProperties) {
        this.solrIndexSearcher = solrIndexSearcher;
        this.properties = jATEProperties;
    }

    public abstract AbstractFeature build() throws JATEException;

    /* JADX INFO: Access modifiers changed from: protected */
    public Set<String> getUniqueWords() throws JATEException, IOException {
        TermsEnum it = SolrUtil.getTermVector(this.properties.getSolrFieldNameJATENGramInfo(), this.solrIndexSearcher).iterator();
        HashSet hashSet = new HashSet();
        while (it.next() != null) {
            BytesRef term = it.term();
            if (term.length != 0) {
                String utf8ToString = term.utf8ToString();
                if (!utf8ToString.contains(" ")) {
                    hashSet.add(utf8ToString);
                }
            }
        }
        if (hashSet.size() == 0) {
            throw new JATEException("MWEMetadata are required on 'Words', however there are no single-token lexical units in the " + this.properties.getSolrFieldNameJATENGramInfo() + " field. Check to see if your analyzer pipeline outputs uni-grams");
        }
        return hashSet;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Set<String> getUniqueTerms() throws JATEException, IOException {
        TermsEnum it = SolrUtil.getTermVector(this.properties.getSolrFieldNameJATECTerms(), this.solrIndexSearcher).iterator();
        HashSet hashSet = new HashSet();
        while (it.next() != null) {
            BytesRef term = it.term();
            if (term.length != 0) {
                hashSet.add(term.utf8ToString());
                if (term.utf8ToString().equals("l hierar hy")) {
                    System.out.println();
                }
            }
        }
        return hashSet;
    }
}
