package org.apache.mahout.classifier;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import org.apache.axis.Constants;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.mahout.classifier.bayes.BayesClassifier;
import org.apache.mahout.classifier.bayes.BayesModel;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
import org.apache.mahout.classifier.cbayes.CBayesModel;
import org.apache.mahout.common.Model;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/mahout-core-0.1.jar:org/apache/mahout/classifier/Classify.class */
public class Classify {
    private static final Logger log = LoggerFactory.getLogger(Classify.class);

    private Classify() {
    }

    public static void main(String[] strArr) throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, OptionException {
        Model cBayesModel;
        DefaultOptionBuilder defaultOptionBuilder = new DefaultOptionBuilder();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        GroupBuilder groupBuilder = new GroupBuilder();
        DefaultOption create = defaultOptionBuilder.withLongName(Constants.MC_RELATIVE_PATH).withRequired(true).withArgument(argumentBuilder.withName(Constants.MC_RELATIVE_PATH).withMinimum(1).withMaximum(1).create()).withDescription("The local file system path").withShortName("p").create();
        DefaultOption create2 = defaultOptionBuilder.withLongName("classify").withRequired(true).withArgument(argumentBuilder.withName("classify").withMinimum(1).withMaximum(1).create()).withDescription("The doc to classify").withShortName("").create();
        DefaultOption create3 = defaultOptionBuilder.withLongName("encoding").withRequired(true).withArgument(argumentBuilder.withName("encoding").withMinimum(1).withMaximum(1).create()).withDescription("The file encoding.  Default: UTF-8").withShortName("e").create();
        DefaultOption create4 = defaultOptionBuilder.withLongName("analyzer").withRequired(true).withArgument(argumentBuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).withDescription("The Analyzer to use").withShortName("a").create();
        DefaultOption create5 = defaultOptionBuilder.withLongName("defaultCat").withRequired(true).withArgument(argumentBuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create()).withDescription("The default category").withShortName("d").create();
        DefaultOption create6 = defaultOptionBuilder.withLongName("gramSize").withRequired(true).withArgument(argumentBuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).withDescription("Size of the n-gram").withShortName("ng").create();
        DefaultOption create7 = defaultOptionBuilder.withLongName("classifierType").withRequired(true).withArgument(argumentBuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()).withDescription("Type of classifier").withShortName("type").create();
        Group create8 = groupBuilder.withName("Options").withOption(create).withOption(create2).withOption(create3).withOption(create4).withOption(create5).withOption(create6).withOption(create7).create();
        Parser parser = new Parser();
        parser.setGroup(create8);
        CommandLine parse = parser.parse(strArr);
        JobConf jobConf = new JobConf(Classify.class);
        HashMap hashMap = new HashMap();
        String str = (String) parse.getValue(create);
        hashMap.put("sigma_j", new Path(str + "/trainer-weights/Sigma_j/part-*"));
        hashMap.put("sigma_k", new Path(str + "/trainer-weights/Sigma_k/part-*"));
        hashMap.put("sigma_kSigma_j", new Path(str + "/trainer-weights/Sigma_kSigma_j/part-*"));
        hashMap.put("thetaNormalizer", new Path(str + "/trainer-thetaNormalizer/part-*"));
        hashMap.put("weight", new Path(str + "/trainer-tfIdf/trainer-tfIdf/part-*"));
        FileSystem fileSystem = FileSystem.get(jobConf);
        log.info("Loading model from: {}", hashMap);
        String str2 = (String) parse.getValue(create7);
        if (str2.equalsIgnoreCase("bayes")) {
            log.info("Testing Bayes Classifier");
            cBayesModel = new BayesModel();
        } else {
            if (!str2.equalsIgnoreCase("cbayes")) {
                throw new IllegalArgumentException("Unrecognized classifier type: " + str2);
            }
            log.info("Testing Complementary Bayes Classifier");
            cBayesModel = new CBayesModel();
        }
        BayesClassifier bayesClassifier = new BayesClassifier();
        SequenceFileModelReader.loadModel(cBayesModel, fileSystem, hashMap, jobConf);
        log.info("Done loading model: # labels: {}", Integer.valueOf(cBayesModel.getLabels().size()));
        log.info("Done generating Model");
        String str3 = parse.hasOption(create5) ? (String) parse.getValue(create5) : "unknown";
        File file = new File((String) parse.getValue(create2));
        String str4 = parse.hasOption(create3) ? (String) parse.getValue(create3) : "UTF-8";
        Analyzer analyzer = parse.hasOption(create4) ? (Analyzer) Class.forName((String) parse.getValue(create4)).asSubclass(Analyzer.class).newInstance() : null;
        if (analyzer == null) {
            analyzer = new StandardAnalyzer();
        }
        int parseInt = parse.hasOption(create6) ? Integer.parseInt((String) parse.getValue(create6)) : 1;
        log.info("Converting input document to proper format");
        String[] readerToDocument = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(file), Charset.forName(str4)));
        StringBuilder sb = new StringBuilder();
        for (String str5 : readerToDocument) {
            sb.append(str5).append(' ');
        }
        List<String> generateNGramsWithoutLabel = Model.generateNGramsWithoutLabel(sb.toString(), parseInt);
        log.info("Done converting");
        log.info("Classifying document: {}", file);
        log.info("Category for {} is {}", file, bayesClassifier.classify(cBayesModel, (String[]) generateNGramsWithoutLabel.toArray(new String[generateNGramsWithoutLabel.size()]), str3));
    }
}
