package org.apache.mahout.classifier;

import com.google.common.io.Files;
import java.io.File;
import java.nio.charset.Charset;
import java.util.List;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import org.apache.mahout.cf.taste.impl.model.jdbc.GenericJDBCDataModel;
import org.apache.mahout.classifier.bayes.algorithm.BayesAlgorithm;
import org.apache.mahout.classifier.bayes.algorithm.CBayesAlgorithm;
import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.classifier.bayes.datastore.InMemoryBayesDatastore;
import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
import org.apache.mahout.classifier.bayes.model.ClassifierContext;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
import org.apache.mahout.common.nlp.NGrams;
import org.apache.mahout.fpm.pfpgrowth.PFPGrowth;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/classifier/Classify.class */
public final class Classify {
    private static final Logger log = LoggerFactory.getLogger(Classify.class);

    private Classify() {
    }

    public static void main(String[] strArr) throws Exception {
        Algorithm cBayesAlgorithm;
        InMemoryBayesDatastore inMemoryBayesDatastore;
        DefaultOptionBuilder defaultOptionBuilder = new DefaultOptionBuilder();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        GroupBuilder groupBuilder = new GroupBuilder();
        DefaultOption create = defaultOptionBuilder.withLongName("path").withRequired(true).withArgument(argumentBuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("The local file system path").withShortName(FuzzyKMeansDriver.M_OPTION).create();
        DefaultOption create2 = defaultOptionBuilder.withLongName("classify").withRequired(true).withArgument(argumentBuilder.withName("classify").withMinimum(1).withMaximum(1).create()).withDescription("The doc to classify").withShortName("").create();
        DefaultOption create3 = defaultOptionBuilder.withLongName(PFPGrowth.ENCODING).withRequired(true).withArgument(argumentBuilder.withName(PFPGrowth.ENCODING).withMinimum(1).withMaximum(1).create()).withDescription("The file encoding.  Default: UTF-8").withShortName("e").create();
        DefaultOption create4 = defaultOptionBuilder.withLongName("analyzer").withRequired(true).withArgument(argumentBuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).withDescription("The Analyzer to use").withShortName("a").create();
        DefaultOption create5 = defaultOptionBuilder.withLongName("defaultCat").withRequired(true).withArgument(argumentBuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create()).withDescription("The default category").withShortName("d").create();
        DefaultOption create6 = defaultOptionBuilder.withLongName("gramSize").withRequired(true).withArgument(argumentBuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).withDescription("Size of the n-gram").withShortName("ng").create();
        DefaultOption create7 = defaultOptionBuilder.withLongName("classifierType").withRequired(true).withArgument(argumentBuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()).withDescription("Type of classifier").withShortName("type").create();
        DefaultOption create8 = defaultOptionBuilder.withLongName(GenericJDBCDataModel.DATA_SOURCE_KEY).withRequired(true).withArgument(argumentBuilder.withName(GenericJDBCDataModel.DATA_SOURCE_KEY).withMinimum(1).withMaximum(1).create()).withDescription("Location of model: hdfs").withShortName("source").create();
        Group create9 = groupBuilder.withName("Options").withOption(create).withOption(create2).withOption(create3).withOption(create4).withOption(create5).withOption(create6).withOption(create7).withOption(create8).create();
        Parser parser = new Parser();
        parser.setGroup(create9);
        CommandLine parse = parser.parse(strArr);
        int parseInt = parse.hasOption(create6) ? Integer.parseInt((String) parse.getValue(create6)) : 1;
        BayesParameters bayesParameters = new BayesParameters();
        bayesParameters.setGramSize(parseInt);
        bayesParameters.setBasePath((String) parse.getValue(create));
        log.info("Loading model from: {}", bayesParameters.print());
        String str = (String) parse.getValue(create7);
        String str2 = (String) parse.getValue(create8);
        if (!"hdfs".equals(str2)) {
            throw new IllegalArgumentException("Unrecognized dataSource type: " + str2);
        }
        if ("bayes".equalsIgnoreCase(str)) {
            log.info("Using Bayes Classifier");
            cBayesAlgorithm = new BayesAlgorithm();
            inMemoryBayesDatastore = new InMemoryBayesDatastore(bayesParameters);
        } else {
            if (!"cbayes".equalsIgnoreCase(str)) {
                throw new IllegalArgumentException("Unrecognized classifier type: " + str);
            }
            log.info("Using Complementary Bayes Classifier");
            cBayesAlgorithm = new CBayesAlgorithm();
            inMemoryBayesDatastore = new InMemoryBayesDatastore(bayesParameters);
        }
        ClassifierContext classifierContext = new ClassifierContext(cBayesAlgorithm, inMemoryBayesDatastore);
        classifierContext.initialize();
        String str3 = parse.hasOption(create5) ? (String) parse.getValue(create5) : "unknown";
        File file = new File((String) parse.getValue(create2));
        String str4 = parse.hasOption(create3) ? (String) parse.getValue(create3) : "UTF-8";
        Analyzer analyzer = parse.hasOption(create4) ? (Analyzer) Class.forName((String) parse.getValue(create4)).asSubclass(Analyzer.class).newInstance() : null;
        if (analyzer == null) {
            analyzer = new StandardAnalyzer(Version.LUCENE_30);
        }
        log.info("Converting input document to proper format");
        String[] readerToDocument = BayesFileFormatter.readerToDocument(analyzer, Files.newReader(file, Charset.forName(str4)));
        StringBuilder sb = new StringBuilder();
        for (String str5 : readerToDocument) {
            sb.append(str5).append(' ');
        }
        List<String> generateNGramsWithoutLabel = new NGrams(sb.toString(), parseInt).generateNGramsWithoutLabel();
        log.info("Done converting");
        log.info("Classifying document: {}", file);
        log.info("Category for {} is {}", file, classifierContext.classifyDocument((String[]) generateNGramsWithoutLabel.toArray(new String[generateNGramsWithoutLabel.size()]), str3));
    }
}
