package org.apache.mahout.clustering.kmeans;

import com.ibm.wsdl.Constants;
import java.io.IOException;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.matrix.SparseVector;
import org.apache.mahout.matrix.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/mahout-core-0.2.jar:org/apache/mahout/clustering/kmeans/KMeansDriver.class */
public class KMeansDriver {
    public static final String DEFAULT_OUTPUT_DIRECTORY = "/points";
    private static final Logger log = LoggerFactory.getLogger(KMeansDriver.class);

    private KMeansDriver() {
    }

    public static void main(String[] strArr) throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
        DefaultOptionBuilder defaultOptionBuilder = new DefaultOptionBuilder();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        GroupBuilder groupBuilder = new GroupBuilder();
        DefaultOption create = defaultOptionBuilder.withLongName(Constants.ELEM_INPUT).withRequired(true).withArgument(argumentBuilder.withName(Constants.ELEM_INPUT).withMinimum(1).withMaximum(1).create()).withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
        DefaultOption create2 = defaultOptionBuilder.withLongName("clusters").withRequired(true).withArgument(argumentBuilder.withName("clusters").withMinimum(1).withMaximum(1).create()).withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  If k is also specified, then a random set of vectors will be selected and written out to this path first").withShortName("c").create();
        DefaultOption create3 = defaultOptionBuilder.withLongName(RandomSeedGenerator.K).withRequired(false).withArgument(argumentBuilder.withName(RandomSeedGenerator.K).withMinimum(1).withMaximum(1).create()).withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.").withShortName(RandomSeedGenerator.K).create();
        DefaultOption create4 = defaultOptionBuilder.withLongName(Constants.ELEM_OUTPUT).withRequired(true).withArgument(argumentBuilder.withName(Constants.ELEM_OUTPUT).withMinimum(1).withMaximum(1).create()).withDescription("The Path to put the output in").withShortName("o").create();
        DefaultOption create5 = defaultOptionBuilder.withLongName("overwrite").withRequired(false).withDescription("If set, overwrite the output directory").withShortName("w").create();
        DefaultOption create6 = defaultOptionBuilder.withLongName("distance").withRequired(false).withArgument(argumentBuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
        DefaultOption create7 = defaultOptionBuilder.withLongName("convergence").withRequired(false).withArgument(argumentBuilder.withName("convergence").withMinimum(1).withMaximum(1).create()).withDescription("The threshold below which the clusters are considered to be converged.  Default is 0.5").withShortName("d").create();
        DefaultOption create8 = defaultOptionBuilder.withLongName("max").withRequired(false).withArgument(argumentBuilder.withName("max").withMinimum(1).withMaximum(1).create()).withDescription("The maximum number of iterations to perform.  Default is 20").withShortName("x").create();
        DefaultOption create9 = defaultOptionBuilder.withLongName("vectorClass").withRequired(false).withArgument(argumentBuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription("The Vector implementation class name.  Default is SparseVector.class").withShortName("v").create();
        DefaultOption create10 = defaultOptionBuilder.withLongName("numReduce").withRequired(false).withArgument(argumentBuilder.withName("numReduce").withMinimum(1).withMaximum(1).create()).withDescription("The number of reduce tasks").withShortName("r").create();
        DefaultOption create11 = defaultOptionBuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
        Group create12 = groupBuilder.withName("Options").withOption(create).withOption(create2).withOption(create4).withOption(create6).withOption(create7).withOption(create8).withOption(create10).withOption(create3).withOption(create9).withOption(create5).withOption(create11).create();
        try {
            Parser parser = new Parser();
            parser.setGroup(create12);
            CommandLine parse = parser.parse(strArr);
            if (parse.hasOption(create11)) {
                CommandLineUtil.printHelp(create12);
                return;
            }
            String obj = parse.getValue(create).toString();
            String obj2 = parse.getValue(create2).toString();
            String obj3 = parse.getValue(create4).toString();
            String name = SquaredEuclideanDistanceMeasure.class.getName();
            if (parse.hasOption(create6)) {
                name = parse.getValue(create6).toString();
            }
            double d = 0.5d;
            if (parse.hasOption(create7)) {
                d = Double.parseDouble(parse.getValue(create7).toString());
            }
            Class cls = !parse.hasOption(create9) ? SparseVector.class : Class.forName(parse.getValue(create9).toString());
            int i = 20;
            if (parse.hasOption(create8)) {
                i = Integer.parseInt(parse.getValue(create8).toString());
            }
            int i2 = 2;
            if (parse.hasOption(create10)) {
                i2 = Integer.parseInt(parse.getValue(create10).toString());
            }
            if (parse.hasOption(create5)) {
                HadoopUtil.overwriteOutput(obj3);
            }
            if (parse.hasOption(create3)) {
                obj2 = RandomSeedGenerator.buildRandom(obj, obj2, Integer.parseInt(parse.getValue(create3).toString())).toString();
            }
            runJob(obj, obj2, obj3, name, d, i, i2, cls);
        } catch (OptionException e) {
            log.error("Exception", e);
            CommandLineUtil.printHelp(create12);
        }
    }

    public static void runJob(String str, String str2, String str3, String str4, double d, int i, int i2, Class<? extends Vector> cls) {
        String d2 = Double.toString(d);
        if (log.isInfoEnabled()) {
            log.info("Input: " + str + " Clusters In: " + str2 + " Out: " + str3 + " Distance: " + str4);
            log.info("convergence: " + d + " max Iterations: " + i + " num Reduce Tasks: " + i2 + " Input Vectors: " + cls.getName());
        }
        boolean z = false;
        for (int i3 = 0; !z && i3 < i; i3++) {
            log.info("Iteration {}", Integer.valueOf(i3));
            z = runIteration(str, str2, str3 + "/clusters-" + i3, str4, d2, i2, i3);
            str2 = str3 + "/clusters-" + i3;
        }
        log.info("Clustering ");
        runClustering(str, str2, str3 + DEFAULT_OUTPUT_DIRECTORY, str4, d2, cls);
    }

    private static boolean runIteration(String str, String str2, String str3, String str4, String str5, int i, int i2) {
        JobConf jobConf = new JobConf(KMeansDriver.class);
        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(KMeansInfo.class);
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(Cluster.class);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        Path path = new Path(str3);
        FileOutputFormat.setOutputPath(jobConf, path);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setMapperClass(KMeansMapper.class);
        jobConf.setCombinerClass(KMeansCombiner.class);
        jobConf.setReducerClass(KMeansReducer.class);
        jobConf.setNumReduceTasks(i);
        jobConf.set("org.apache.mahout.clustering.kmeans.path", str2);
        jobConf.set("org.apache.mahout.clustering.kmeans.measure", str4);
        jobConf.set("org.apache.mahout.clustering.kmeans.convergence", str5);
        jobConf.setInt(Cluster.ITERATION_NUMBER, i2);
        try {
            JobClient.runJob(jobConf);
            return isConverged(str3, jobConf, FileSystem.get(path.toUri(), jobConf));
        } catch (IOException e) {
            log.warn(e.toString(), (Throwable) e);
            return true;
        }
    }

    private static void runClustering(String str, String str2, String str3, String str4, String str5, Class<? extends Vector> cls) {
        if (log.isInfoEnabled()) {
            log.info("Running Clustering");
            log.info("Input: " + str + " Clusters In: " + str2 + " Out: " + str3 + " Distance: " + str4);
            log.info("convergence: " + str5 + " Input Vectors: " + cls.getName());
        }
        JobConf jobConf = new JobConf(KMeansDriver.class);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(cls);
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        FileOutputFormat.setOutputPath(jobConf, new Path(str3));
        jobConf.setMapperClass(KMeansClusterMapper.class);
        jobConf.setNumReduceTasks(0);
        jobConf.set("org.apache.mahout.clustering.kmeans.path", str2);
        jobConf.set("org.apache.mahout.clustering.kmeans.measure", str4);
        jobConf.set("org.apache.mahout.clustering.kmeans.convergence", str5);
        try {
            JobClient.runJob(jobConf);
        } catch (IOException e) {
            log.warn(e.toString(), (Throwable) e);
        }
    }

    private static boolean isConverged(String str, JobConf jobConf, FileSystem fileSystem) throws IOException {
        for (FileStatus fileStatus : fileSystem.listStatus(new Path(str))) {
            String name = fileStatus.getPath().getName();
            if (name.startsWith("part") && !name.endsWith(".crc")) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fileStatus.getPath(), jobConf);
                try {
                    Writable writable = (Writable) reader.getKeyClass().newInstance();
                    Cluster cluster = new Cluster();
                    while (reader.next(writable, cluster)) {
                        if (!cluster.isConverged()) {
                            return false;
                        }
                    }
                } catch (IllegalAccessException e) {
                    log.error("Exception", (Throwable) e);
                    throw new IllegalStateException(e);
                } catch (InstantiationException e2) {
                    log.error("Exception", (Throwable) e2);
                    throw new IllegalStateException(e2);
                }
            }
        }
        return true;
    }
}
