package org.apache.mahout.df.mapreduce;

import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.classifier.ClassifierResult;
import org.apache.mahout.classifier.ResultAnalyzer;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.df.DFUtils;
import org.apache.mahout.df.DecisionForest;
import org.apache.mahout.df.data.DataConverter;
import org.apache.mahout.df.data.Dataset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/df/mapreduce/Classifier.class */
public class Classifier {
    private static final Logger log = LoggerFactory.getLogger(Classifier.class);
    private final Path forestPath;
    private final Path inputPath;
    private final Path datasetPath;
    private final Configuration conf;
    private final ResultAnalyzer analyzer;
    private final Dataset dataset;
    private final Path outputPath;
    private final Path mappersOutputPath;

    /* loaded from: input_file:org/apache/mahout/df/mapreduce/Classifier$CMapper.class */
    public static class CMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
        private DataConverter converter;
        private DecisionForest forest;
        private final Random rng = RandomUtils.getRandom();
        private boolean first = true;
        private final Text lvalue = new Text();

        protected void setup(Mapper<LongWritable, Text, LongWritable, Text>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            Configuration configuration = context.getConfiguration();
            URI[] cacheFiles = DistributedCache.getCacheFiles(configuration);
            if (cacheFiles == null || cacheFiles.length < 2) {
                throw new IOException("not enough paths in the DistributedCache");
            }
            this.converter = new DataConverter(Dataset.load(configuration, new Path(cacheFiles[0].getPath())));
            this.forest = DecisionForest.load(configuration, new Path(cacheFiles[1].getPath()));
            if (this.forest == null) {
                throw new InterruptedException("DecisionForest not found!");
            }
        }

        protected void map(LongWritable longWritable, Text text, Mapper<LongWritable, Text, LongWritable, Text>.Context context) throws IOException, InterruptedException {
            if (this.first) {
                this.lvalue.set(context.getInputSplit().getPath().getName());
                context.write(longWritable, this.lvalue);
                this.first = false;
            }
            String text2 = text.toString();
            if (text2.isEmpty()) {
                return;
            }
            int classify = this.forest.classify(this.rng, this.converter.convert(0, text2));
            longWritable.set(r0.getLabel());
            this.lvalue.set(Integer.toString(classify));
            context.write(longWritable, this.lvalue);
        }

        protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((LongWritable) obj, (Text) obj2, (Mapper<LongWritable, Text, LongWritable, Text>.Context) context);
        }
    }

    /* loaded from: input_file:org/apache/mahout/df/mapreduce/Classifier$CTextInputFormat.class */
    public static class CTextInputFormat extends TextInputFormat {
        protected boolean isSplitable(JobContext jobContext, Path path) {
            return false;
        }
    }

    public ResultAnalyzer getAnalyzer() {
        return this.analyzer;
    }

    public Classifier(Path path, Path path2, Path path3, Path path4, Configuration configuration, boolean z) throws IOException {
        this.forestPath = path;
        this.inputPath = path2;
        this.datasetPath = path3;
        this.outputPath = path4;
        this.conf = configuration;
        if (z) {
            this.dataset = Dataset.load(configuration, path3);
            this.analyzer = new ResultAnalyzer(Arrays.asList(this.dataset.labels()), "unknown");
        } else {
            this.dataset = null;
            this.analyzer = null;
        }
        this.mappersOutputPath = new Path(path4, "mappers");
    }

    private void configureJob(Job job) throws IOException {
        job.setJarByClass(Classifier.class);
        FileInputFormat.setInputPaths(job, new Path[]{this.inputPath});
        FileOutputFormat.setOutputPath(job, this.mappersOutputPath);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        job.setMapperClass(CMapper.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(CTextInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
    }

    public void run() throws IOException, ClassNotFoundException, InterruptedException {
        if (FileSystem.get(this.conf).exists(this.outputPath)) {
            throw new IOException("Output path already exists : " + this.outputPath);
        }
        log.info("Adding the dataset to the DistributedCache");
        DistributedCache.addCacheFile(this.datasetPath.toUri(), this.conf);
        log.info("Adding the decision forest to the DistributedCache");
        DistributedCache.addCacheFile(this.forestPath.toUri(), this.conf);
        Job job = new Job(this.conf, "decision forest classifier");
        log.info("Configuring the job...");
        configureJob(job);
        log.info("Running the job...");
        if (!job.waitForCompletion(true)) {
            log.error("Job failed!");
        } else {
            parseOutput(job);
            HadoopUtil.delete(this.conf, this.mappersOutputPath);
        }
    }

    private void parseOutput(JobContext jobContext) throws IOException {
        Configuration configuration = jobContext.getConfiguration();
        FileSystem fileSystem = this.mappersOutputPath.getFileSystem(configuration);
        for (Path path : DFUtils.listOutputFiles(fileSystem, this.mappersOutputPath)) {
            FSDataOutputStream fSDataOutputStream = null;
            try {
                Iterator it = new SequenceFileIterable(path, true, configuration).iterator();
                while (it.hasNext()) {
                    Pair pair = (Pair) it.next();
                    int i = (int) ((LongWritable) pair.getFirst()).get();
                    String text = ((Text) pair.getSecond()).toString();
                    if (fSDataOutputStream == null) {
                        fSDataOutputStream = fileSystem.create(new Path(this.outputPath, text).suffix(".out"));
                    } else {
                        fSDataOutputStream.writeChars(text);
                        fSDataOutputStream.writeChar(10);
                        if (this.analyzer != null) {
                            this.analyzer.addInstance(this.dataset.getLabel(i), new ClassifierResult(this.dataset.getLabel(Integer.parseInt(text)), 1.0d));
                        }
                    }
                }
            } finally {
                fSDataOutputStream.close();
            }
        }
    }
}
