package org.apache.mahout.clustering.fuzzykmeans;

import com.ibm.wsdl.Constants;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/mahout-core-0.3.jar:org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.class */
public final class FuzzyKMeansDriver {
    private static final Logger log = LoggerFactory.getLogger(FuzzyKMeansDriver.class);

    private FuzzyKMeansDriver() {
    }

    public static void main(String[] strArr) throws Exception {
        DefaultOptionBuilder defaultOptionBuilder = new DefaultOptionBuilder();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        GroupBuilder groupBuilder = new GroupBuilder();
        DefaultOption create = defaultOptionBuilder.withLongName(Constants.ELEM_INPUT).withRequired(true).withArgument(argumentBuilder.withName(Constants.ELEM_INPUT).withMinimum(1).withMaximum(1).create()).withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
        DefaultOption create2 = defaultOptionBuilder.withLongName("clusters").withRequired(true).withArgument(argumentBuilder.withName("clusters").withMinimum(1).withMaximum(1).create()).withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  If k is also specified, then a random set of vectors will be selected and written out to this path first").withShortName("c").create();
        DefaultOption create3 = defaultOptionBuilder.withLongName(RandomSeedGenerator.K).withRequired(false).withArgument(argumentBuilder.withName(RandomSeedGenerator.K).withMinimum(1).withMaximum(1).create()).withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.").withShortName(RandomSeedGenerator.K).create();
        DefaultOption create4 = defaultOptionBuilder.withLongName(Constants.ELEM_OUTPUT).withRequired(true).withArgument(argumentBuilder.withName(Constants.ELEM_OUTPUT).withMinimum(1).withMaximum(1).create()).withDescription("The Path to put the output in").withShortName("o").create();
        DefaultOption create5 = defaultOptionBuilder.withLongName("distance").withRequired(false).withArgument(argumentBuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("dm").create();
        DefaultOption create6 = defaultOptionBuilder.withLongName("convergence").withRequired(false).withArgument(argumentBuilder.withName("convergence").withMinimum(1).withMaximum(1).create()).withDescription("The threshold below which the clusters are considered to be converged.  Default is 0.5").withShortName("d").create();
        DefaultOption create7 = defaultOptionBuilder.withLongName("max").withRequired(false).withArgument(argumentBuilder.withName("max").withMinimum(1).withMaximum(1).create()).withDescription("The maximum number of iterations to perform.  Default is 20").withShortName("x").create();
        DefaultOption create8 = defaultOptionBuilder.withLongName("vectorClass").withRequired(false).withArgument(argumentBuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v").create();
        DefaultOption create9 = defaultOptionBuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
        DefaultOption create10 = defaultOptionBuilder.withLongName("overwrite").withRequired(false).withDescription("If set, overwrite the output directory").withShortName("w").create();
        DefaultOption create11 = defaultOptionBuilder.withLongName("clustering").withRequired(false).withDescription("If true, run clustering only (assumes the iterations have already taken place").withShortName("l").create();
        DefaultOption create12 = defaultOptionBuilder.withLongName("m").withRequired(true).withArgument(argumentBuilder.withName("m").withMinimum(1).withMaximum(1).create()).withDescription("coefficient normalization factor, must be greater than 1").withShortName("m").create();
        DefaultOption create13 = defaultOptionBuilder.withLongName("numReduce").withRequired(false).withArgument(argumentBuilder.withName("numReduce").withMinimum(1).withMaximum(1).create()).withDescription("The number of reduce tasks").withShortName("r").create();
        DefaultOption create14 = defaultOptionBuilder.withLongName("numMap").withRequired(false).withArgument(argumentBuilder.withName("numMap").withMinimum(1).withMaximum(1).create()).withDescription("The number of map tasks").withShortName("u").create();
        Group create15 = groupBuilder.withName("Options").withOption(create).withOption(create2).withOption(create4).withOption(create5).withOption(create6).withOption(create7).withOption(create3).withOption(create12).withOption(create8).withOption(create10).withOption(create9).create();
        try {
            Parser parser = new Parser();
            parser.setGroup(create15);
            CommandLine parse = parser.parse(strArr);
            if (parse.hasOption(create9)) {
                CommandLineUtil.printHelp(create15);
                return;
            }
            String obj = parse.getValue(create).toString();
            String obj2 = parse.getValue(create2).toString();
            String obj3 = parse.getValue(create4).toString();
            String name = SquaredEuclideanDistanceMeasure.class.getName();
            if (parse.hasOption(create5)) {
                name = parse.getValue(create5).toString();
            }
            double d = 0.5d;
            if (parse.hasOption(create6)) {
                d = Double.parseDouble(parse.getValue(create6).toString());
            }
            float parseFloat = Float.parseFloat(parse.getValue(create12).toString());
            int i = 10;
            if (parse.hasOption(create13)) {
                i = Integer.parseInt(parse.getValue(create13).toString());
            }
            int i2 = 50;
            if (parse.hasOption(create14)) {
                i2 = Integer.parseInt(parse.getValue(create14).toString());
            }
            int i3 = 20;
            if (parse.hasOption(create7)) {
                i3 = Integer.parseInt(parse.getValue(create7).toString());
            }
            if (parse.hasOption(create10)) {
                HadoopUtil.overwriteOutput(obj3);
            }
            if (parse.hasOption(create3)) {
                obj2 = RandomSeedGenerator.buildRandom(obj, obj2, Integer.parseInt(parse.getValue(create3).toString())).toString();
            }
            if (parse.hasOption(create11)) {
                runClustering(obj, obj2, obj3, name, d, i2, parseFloat);
            } else {
                runJob(obj, obj2, obj3, name, d, i3, i2, i, parseFloat);
            }
        } catch (OptionException e) {
            log.error("Exception", e);
            CommandLineUtil.printHelp(create15);
        }
    }

    public static void runJob(String str, String str2, String str3, String str4, double d, int i, int i2, int i3, float f) {
        boolean z = false;
        for (int i4 = 0; !z && i4 < i; i4++) {
            log.info("Iteration {}", Integer.valueOf(i4));
            z = runIteration(str, str2, str3 + File.separator + "clusters-" + i4, str4, d, i2, i3, i4, f);
            str2 = str3 + File.separator + "clusters-" + i4;
        }
        log.info("Clustering ");
        runClustering(str, str2, str3 + File.separator + "points", str4, d, i2, f);
    }

    private static boolean runIteration(String str, String str2, String str3, String str4, double d, int i, int i2, int i3, float f) {
        JobConf jobConf = new JobConf(FuzzyKMeansJob.class);
        jobConf.setJobName("Fuzzy K Means{" + i3 + '}');
        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(FuzzyKMeansInfo.class);
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(SoftCluster.class);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        Path path = new Path(str3);
        FileOutputFormat.setOutputPath(jobConf, path);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setMapperClass(FuzzyKMeansMapper.class);
        jobConf.setCombinerClass(FuzzyKMeansCombiner.class);
        jobConf.setReducerClass(FuzzyKMeansReducer.class);
        jobConf.setNumMapTasks(i);
        jobConf.setNumReduceTasks(i2);
        jobConf.set("org.apache.mahout.clustering.kmeans.path", str2);
        jobConf.set("org.apache.mahout.clustering.kmeans.measure", str4);
        jobConf.set("org.apache.mahout.clustering.kmeans.convergence", String.valueOf(d));
        jobConf.set(FuzzyKMeansConfigKeys.M_KEY, String.valueOf(f));
        try {
            JobClient.runJob(jobConf);
            return isConverged(str3, jobConf, FileSystem.get(path.toUri(), jobConf));
        } catch (IOException e) {
            log.warn(e.toString(), (Throwable) e);
            return true;
        }
    }

    private static void runClustering(String str, String str2, String str3, String str4, double d, int i, float f) {
        JobConf jobConf = new JobConf(FuzzyKMeansDriver.class);
        jobConf.setJobName("Fuzzy K Means Clustering");
        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(VectorWritable.class);
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(FuzzyKMeansOutput.class);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        FileOutputFormat.setOutputPath(jobConf, new Path(str3));
        jobConf.setMapperClass(FuzzyKMeansClusterMapper.class);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setNumMapTasks(i);
        jobConf.setNumReduceTasks(0);
        jobConf.set("org.apache.mahout.clustering.kmeans.path", str2);
        jobConf.set("org.apache.mahout.clustering.kmeans.measure", str4);
        jobConf.set("org.apache.mahout.clustering.kmeans.convergence", String.valueOf(d));
        jobConf.set(FuzzyKMeansConfigKeys.M_KEY, String.valueOf(f));
        try {
            JobClient.runJob(jobConf);
        } catch (IOException e) {
            log.warn(e.toString(), (Throwable) e);
        }
    }

    private static boolean isConverged(String str, Configuration configuration, FileSystem fileSystem) throws IOException {
        Path path = new Path(str + "/*");
        ArrayList arrayList = new ArrayList();
        PathFilter pathFilter = new PathFilter() { // from class: org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver.1
            public boolean accept(Path path2) {
                return path2.getName().startsWith("part");
            }
        };
        for (FileStatus fileStatus : fileSystem.listStatus(FileUtil.stat2Paths(fileSystem.globStatus(path, pathFilter)), pathFilter)) {
            arrayList.add(fileSystem.makeQualified(fileStatus.getPath()));
        }
        boolean z = true;
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fileSystem, (Path) it.next(), configuration);
                Text text = new Text();
                SoftCluster softCluster = new SoftCluster();
                while (z && reader.next(text, softCluster)) {
                    z = softCluster.isConverged();
                }
                if (reader != null) {
                    reader.close();
                }
            } catch (Throwable th) {
                if (reader != null) {
                    reader.close();
                }
                throw th;
            }
        }
        return z;
    }
}
