package org.apache.mahout.clustering.kmeans;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.ClusterObservations;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/clustering/kmeans/KMeansDriver.class */
public class KMeansDriver extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(KMeansDriver.class);

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new KMeansDriver(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption(DefaultOptionCreator.distanceMeasureOption().create());
        addOption(DefaultOptionCreator.clustersInOption().withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  If k is also specified, then a random set of vectors will be selected and written out to this path first").create());
        addOption(DefaultOptionCreator.numClustersOption().withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters input path.").create());
        addOption(DefaultOptionCreator.convergenceOption().create());
        addOption(DefaultOptionCreator.maxIterationsOption().create());
        addOption(DefaultOptionCreator.overwriteOption().create());
        addOption(DefaultOptionCreator.clusteringOption().create());
        addOption(DefaultOptionCreator.methodOption().create());
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Path inputPath = getInputPath();
        Path path = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
        Path outputPath = getOutputPath();
        String option = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
        if (option == null) {
            option = SquaredEuclideanDistanceMeasure.class.getName();
        }
        double parseDouble = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
        int parseInt = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
        if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
            HadoopUtil.delete(getConf(), outputPath);
        }
        DistanceMeasure distanceMeasure = (DistanceMeasure) Thread.currentThread().getContextClassLoader().loadClass(option).asSubclass(DistanceMeasure.class).newInstance();
        if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
            path = RandomSeedGenerator.buildRandom(getConf(), inputPath, path, Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), distanceMeasure);
        }
        boolean hasOption = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
        boolean equalsIgnoreCase = getOption("method").equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
        if (getConf() == null) {
            setConf(new Configuration());
        }
        run(getConf(), inputPath, path, outputPath, distanceMeasure, parseDouble, parseInt, hasOption, equalsIgnoreCase);
        return 0;
    }

    public static void run(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, boolean z, boolean z2) throws IOException, InterruptedException, ClassNotFoundException {
        String d2 = Double.toString(d);
        if (log.isInfoEnabled()) {
            log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[]{path, path2, path3, distanceMeasure.getClass().getName()});
            log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[]{Double.valueOf(d), Integer.valueOf(i), VectorWritable.class.getName()});
        }
        Path buildClusters = buildClusters(configuration, path, path2, path3, distanceMeasure, i, d2, z2);
        if (z) {
            log.info("Clustering data");
            clusterData(configuration, path, buildClusters, new Path(path3, "clusteredPoints"), distanceMeasure, d2, z2);
        }
    }

    public static void run(Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, boolean z, boolean z2) throws IOException, InterruptedException, ClassNotFoundException {
        run(new Configuration(), path, path2, path3, distanceMeasure, d, i, z, z2);
    }

    public static Path buildClusters(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, int i, String str, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        return z ? buildClustersSeq(configuration, path, path2, path3, distanceMeasure, i, str) : buildClustersMR(configuration, path, path2, path3, distanceMeasure, i, str);
    }

    private static Path buildClustersSeq(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, int i, String str) throws IOException {
        KMeansClusterer kMeansClusterer = new KMeansClusterer(distanceMeasure);
        ArrayList<Cluster> arrayList = new ArrayList();
        KMeansUtil.configureWithClusterInfo(configuration, path2, arrayList);
        if (arrayList.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        boolean z = false;
        for (int i2 = 1; !z && i2 <= i; i2++) {
            log.info("K-Means Iteration: " + i2);
            FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
            Iterator it = new SequenceFileDirValueIterable(path, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
            while (it.hasNext()) {
                kMeansClusterer.addPointToNearestCluster(((VectorWritable) it.next()).get(), arrayList);
            }
            z = kMeansClusterer.testConvergence(arrayList, Double.parseDouble(str));
            Path path4 = new Path(path3, org.apache.mahout.clustering.Cluster.CLUSTERS_DIR + i2);
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path4, "part-r-00000"), Text.class, Cluster.class);
            try {
                for (Cluster cluster : arrayList) {
                    log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", new Object[]{Integer.valueOf(cluster.getId()), AbstractCluster.formatVector(cluster.getCenter(), null), Long.valueOf(cluster.getNumPoints()), AbstractCluster.formatVector(cluster.getRadius(), null), path4.getName()});
                    writer.append(new Text(cluster.getIdentifier()), cluster);
                }
                path2 = path4;
            } finally {
                writer.close();
            }
        }
        return path2;
    }

    private static Path buildClustersMR(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, int i, String str) throws IOException, InterruptedException, ClassNotFoundException {
        boolean z = false;
        for (int i2 = 1; !z && i2 <= i; i2++) {
            log.info("K-Means Iteration {}", Integer.valueOf(i2));
            Path path4 = new Path(path3, org.apache.mahout.clustering.Cluster.CLUSTERS_DIR + i2);
            z = runIteration(configuration, path, path2, path4, distanceMeasure.getClass().getName(), str);
            path2 = path4;
        }
        return path2;
    }

    private static boolean runIteration(Configuration configuration, Path path, Path path2, Path path3, String str, String str2) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("org.apache.mahout.clustering.kmeans.path", path2.toString());
        configuration.set("org.apache.mahout.clustering.kmeans.measure", str);
        configuration.set("org.apache.mahout.clustering.kmeans.convergence", str2);
        Job job = new Job(configuration, "KMeans Driver running runIteration over clustersIn: " + path2);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ClusterObservations.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Cluster.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(KMeansMapper.class);
        job.setCombinerClass(KMeansCombiner.class);
        job.setReducerClass(KMeansReducer.class);
        FileInputFormat.addInputPath(job, path);
        FileOutputFormat.setOutputPath(job, path3);
        job.setJarByClass(KMeansDriver.class);
        HadoopUtil.delete(configuration, path3);
        if (job.waitForCompletion(true)) {
            return isConverged(path3, configuration, FileSystem.get(path3.toUri(), configuration));
        }
        throw new InterruptedException("K-Means Iteration failed processing " + path2);
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static boolean isConverged(Path path, Configuration configuration, FileSystem fileSystem) throws IOException {
        for (FileStatus fileStatus : fileSystem.listStatus(path, PathFilters.partFilter())) {
            SequenceFileValueIterator sequenceFileValueIterator = new SequenceFileValueIterator(fileStatus.getPath(), true, configuration);
            while (sequenceFileValueIterator.hasNext()) {
                if (!((Cluster) sequenceFileValueIterator.next()).isConverged()) {
                    sequenceFileValueIterator.close();
                    return false;
                }
            }
        }
        return true;
    }

    public static void clusterData(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, String str, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        if (log.isInfoEnabled()) {
            log.info("Running Clustering");
            log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[]{path, path2, path3, distanceMeasure});
            log.info("convergence: {} Input Vectors: {}", str, VectorWritable.class.getName());
        }
        if (z) {
            clusterDataSeq(configuration, path, path2, path3, distanceMeasure);
        } else {
            clusterDataMR(configuration, path, path2, path3, distanceMeasure, str);
        }
    }

    private static void clusterDataSeq(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure) throws IOException {
        KMeansClusterer kMeansClusterer = new KMeansClusterer(distanceMeasure);
        ArrayList arrayList = new ArrayList();
        KMeansUtil.configureWithClusterInfo(configuration, path2, arrayList);
        if (arrayList.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        for (FileStatus fileStatus : fileSystem.listStatus(path, PathFilters.logsCRCFilter())) {
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path3, "part-m-0"), IntWritable.class, WeightedVectorWritable.class);
            try {
                Iterator it = new SequenceFileValueIterable(fileStatus.getPath(), configuration).iterator();
                while (it.hasNext()) {
                    kMeansClusterer.emitPointToNearestCluster(((VectorWritable) it.next()).get(), arrayList, writer);
                }
            } finally {
                writer.close();
            }
        }
    }

    private static void clusterDataMR(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, String str) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("org.apache.mahout.clustering.kmeans.path", path2.toString());
        configuration.set("org.apache.mahout.clustering.kmeans.measure", distanceMeasure.getClass().getName());
        configuration.set("org.apache.mahout.clustering.kmeans.convergence", str);
        Job job = new Job(configuration, "KMeans Driver running clusterData over input: " + path);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        HadoopUtil.delete(configuration, path3);
        FileOutputFormat.setOutputPath(job, path3);
        job.setMapperClass(KMeansClusterMapper.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(KMeansDriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("K-Means Clustering failed processing " + path2);
        }
    }
}
