package org.apache.mahout.clustering.canopy;

import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.classify.ClusterClassificationDriver;
import org.apache.mahout.clustering.classify.ClusterClassifier;
import org.apache.mahout.clustering.iterator.CanopyClusteringPolicy;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/clustering/canopy/CanopyDriver.class */
public class CanopyDriver extends AbstractJob {
    public static final String DEFAULT_CLUSTERED_POINTS_DIRECTORY = "clusteredPoints";
    private static final Logger log = LoggerFactory.getLogger(CanopyDriver.class);

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new CanopyDriver(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption(DefaultOptionCreator.distanceMeasureOption().create());
        addOption(DefaultOptionCreator.t1Option().create());
        addOption(DefaultOptionCreator.t2Option().create());
        addOption(DefaultOptionCreator.t3Option().create());
        addOption(DefaultOptionCreator.t4Option().create());
        addOption(DefaultOptionCreator.clusterFilterOption().create());
        addOption(DefaultOptionCreator.overwriteOption().create());
        addOption(DefaultOptionCreator.clusteringOption().create());
        addOption(DefaultOptionCreator.methodOption().create());
        addOption(DefaultOptionCreator.outlierThresholdOption().create());
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();
        Configuration conf = getConf();
        if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
            HadoopUtil.delete(conf, outputPath);
        }
        String option = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
        double parseDouble = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
        double parseDouble2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
        double d = parseDouble;
        if (hasOption(DefaultOptionCreator.T3_OPTION)) {
            d = Double.parseDouble(getOption(DefaultOptionCreator.T3_OPTION));
        }
        double d2 = parseDouble2;
        if (hasOption(DefaultOptionCreator.T4_OPTION)) {
            d2 = Double.parseDouble(getOption(DefaultOptionCreator.T4_OPTION));
        }
        int i = 0;
        if (hasOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION)) {
            i = Integer.parseInt(getOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION));
        }
        boolean hasOption = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
        boolean equalsIgnoreCase = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase("sequential");
        DistanceMeasure distanceMeasure = (DistanceMeasure) ClassUtils.instantiateAs(option, DistanceMeasure.class);
        double d3 = 0.0d;
        if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
            d3 = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
        }
        run(conf, inputPath, outputPath, distanceMeasure, parseDouble, parseDouble2, d, d2, i, hasOption, d3, equalsIgnoreCase);
        return 0;
    }

    public static void run(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, double d, double d2, double d3, double d4, int i, boolean z, double d5, boolean z2) throws IOException, InterruptedException, ClassNotFoundException {
        Path buildClusters = buildClusters(configuration, path, path2, distanceMeasure, d, d2, d3, d4, i, z2);
        if (z) {
            clusterData(configuration, path, buildClusters, path2, d5, z2);
        }
    }

    public static void run(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, double d, double d2, boolean z, double d3, boolean z2) throws IOException, InterruptedException, ClassNotFoundException {
        run(configuration, path, path2, distanceMeasure, d, d2, d, d2, 0, z, d3, z2);
    }

    public static void run(Path path, Path path2, DistanceMeasure distanceMeasure, double d, double d2, boolean z, double d3, boolean z2) throws IOException, InterruptedException, ClassNotFoundException {
        run(new Configuration(), path, path2, distanceMeasure, d, d2, z, d3, z2);
    }

    public static Path buildClusters(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, double d, double d2, int i, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        return buildClusters(configuration, path, path2, distanceMeasure, d, d2, d, d2, i, z);
    }

    public static Path buildClusters(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, double d, double d2, double d3, double d4, int i, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        log.info("Build Clusters Input: {} Out: {} Measure: {} t1: {} t2: {}", new Object[]{path, path2, distanceMeasure, Double.valueOf(d), Double.valueOf(d2)});
        return z ? buildClustersSeq(path, path2, distanceMeasure, d, d2, i) : buildClustersMR(configuration, path, path2, distanceMeasure, d, d2, d3, d4, i);
    }

    private static Path buildClustersSeq(Path path, Path path2, DistanceMeasure distanceMeasure, double d, double d2, int i) throws IOException {
        CanopyClusterer canopyClusterer = new CanopyClusterer(distanceMeasure, d, d2);
        ArrayList newArrayList = Lists.newArrayList();
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        Iterator it = new SequenceFileDirValueIterable(path, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
        while (it.hasNext()) {
            canopyClusterer.addPointToCanopies(((VectorWritable) it.next()).get(), newArrayList);
        }
        Path path3 = new Path(path2, "clusters-0-final");
        SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path3, "part-r-00000"), Text.class, ClusterWritable.class);
        try {
            ClusterWritable clusterWritable = new ClusterWritable();
            for (Canopy canopy : newArrayList) {
                canopy.computeParameters();
                if (log.isDebugEnabled()) {
                    log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}", new Object[]{canopy.getIdentifier(), AbstractCluster.formatVector(canopy.getCenter(), null), Long.valueOf(canopy.getNumObservations()), AbstractCluster.formatVector(canopy.getRadius(), null)});
                }
                if (canopy.getNumObservations() > i) {
                    clusterWritable.setValue(canopy);
                    writer.append(new Text(canopy.getIdentifier()), clusterWritable);
                }
            }
            return path3;
        } finally {
            Closeables.close(writer, false);
        }
    }

    private static Path buildClustersMR(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, double d, double d2, double d3, double d4, int i) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, distanceMeasure.getClass().getName());
        configuration.set(CanopyConfigKeys.T1_KEY, String.valueOf(d));
        configuration.set(CanopyConfigKeys.T2_KEY, String.valueOf(d2));
        configuration.set(CanopyConfigKeys.T3_KEY, String.valueOf(d3));
        configuration.set(CanopyConfigKeys.T4_KEY, String.valueOf(d4));
        configuration.set(CanopyConfigKeys.CF_KEY, String.valueOf(i));
        Job job = new Job(configuration, "Canopy Driver running buildClusters over input: " + path);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(CanopyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(VectorWritable.class);
        job.setReducerClass(CanopyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(ClusterWritable.class);
        job.setNumReduceTasks(1);
        job.setJarByClass(CanopyDriver.class);
        FileInputFormat.addInputPath(job, path);
        Path path3 = new Path(path2, "clusters-0-final");
        FileOutputFormat.setOutputPath(job, path3);
        if (job.waitForCompletion(true)) {
            return path3;
        }
        throw new InterruptedException("Canopy Job failed processing " + path);
    }

    private static void clusterData(Configuration configuration, Path path, Path path2, Path path3, double d, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), path2);
        ClusterClassificationDriver.run(configuration, path, path3, new Path(path3, "clusteredPoints"), d, true, z);
    }
}
