package org.apache.mahout.clustering.spectral.kmeans;

import com.ibm.icu.text.DateFormat;
import com.sun.jersey.core.header.QualityFactor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.server.datanode.DataStorage;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.spectral.AffinityMatrixInputJob;
import org.apache.mahout.clustering.spectral.MatrixDiagonalizeJob;
import org.apache.mahout.clustering.spectral.UnitVectorizerJob;
import org.apache.mahout.clustering.spectral.VectorMatrixMultiplicationJob;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.apache.mahout.math.hadoop.stochasticsvd.SSVDSolver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.class */
public class SpectralKMeansDriver extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) SpectralKMeansDriver.class);
    public static final int REDUCERS = 10;
    public static final int BLOCKHEIGHT = 30000;
    public static final int OVERSAMPLING = 15;
    public static final int POWERITERS = 0;

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new SpectralKMeansDriver(), strArr);
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws Exception {
        Configuration conf = getConf();
        addInputOption();
        addOutputOption();
        addOption("dimensions", DateFormat.DAY, "Square dimensions of affinity matrix", true);
        addOption(DefaultOptionCreator.CLUSTERS_IN_OPTION, "k", "Number of clusters and top eigenvectors", true);
        addOption(DefaultOptionCreator.distanceMeasureOption().create());
        addOption(DefaultOptionCreator.convergenceOption().create());
        addOption(DefaultOptionCreator.maxIterationsOption().create());
        addOption(DefaultOptionCreator.overwriteOption().create());
        addFlag("usessvd", "ssvd", "Uses SSVD as the eigensolver. Default is the Lanczos solver.");
        addOption("reduceTasks", "t", "Number of reducers for SSVD", String.valueOf(10));
        addOption("outerProdBlockHeight", "oh", "Block height of outer products for SSVD", String.valueOf(30000));
        addOption("oversampling", "p", "Oversampling parameter for SSVD", String.valueOf(15));
        addOption("powerIter", QualityFactor.QUALITY_FACTOR, "Additional power iterations for SSVD", String.valueOf(0));
        if (parseArguments(strArr) == null) {
            return 0;
        }
        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();
        if (hasOption("overwrite")) {
            HadoopUtil.delete(conf, getTempPath());
            HadoopUtil.delete(conf, getOutputPath());
        }
        run(conf, inputPath, outputPath, Integer.parseInt(getOption("dimensions")), Integer.parseInt(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION)), (DistanceMeasure) ClassUtils.instantiateAs(getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION), DistanceMeasure.class), Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)), Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)), new Path(getOption("tempDir")), Integer.parseInt(getOption("reduceTasks")), Integer.parseInt(getOption("outerProdBlockHeight")), Integer.parseInt(getOption("oversampling")), Integer.parseInt(getOption("powerIter")));
        return 0;
    }

    public static void run(Configuration configuration, Path path, Path path2, int i, int i2, DistanceMeasure distanceMeasure, double d, int i3, Path path3) throws IOException, InterruptedException, ClassNotFoundException {
        run(configuration, path, path2, i, i2, distanceMeasure, d, i3, path3, 10, 30000, 15, 0);
    }

    public static void run(Configuration configuration, Path path, Path path2, int i, int i2, DistanceMeasure distanceMeasure, double d, int i3, Path path3, int i4, int i5, int i6, int i7) throws IOException, InterruptedException, ClassNotFoundException {
        HadoopUtil.delete(configuration, path3);
        Path path4 = new Path(path3, "calculations");
        Path path5 = new Path(path3, "temporary");
        Path path6 = new Path(path4, "seqfile");
        AffinityMatrixInputJob.runJob(path, path6, i, i);
        DistributedRowMatrix distributedRowMatrix = new DistributedRowMatrix(path6, new Path(path5, "afftmp"), i, i);
        Configuration configuration2 = new Configuration(configuration);
        distributedRowMatrix.setConf(configuration2);
        DistributedRowMatrix runJob = VectorMatrixMultiplicationJob.runJob(path6, MatrixDiagonalizeJob.runJob(path6, i), new Path(path4, "laplacian"), new Path(path4, path4));
        runJob.setConf(configuration2);
        SSVDSolver sSVDSolver = new SSVDSolver(configuration2, new Path[]{runJob.getRowPath()}, new Path(path4, "SSVD"), i5, i2, i6, i4);
        sSVDSolver.setComputeV(false);
        sSVDSolver.setComputeU(true);
        sSVDSolver.setOverwrite(true);
        sSVDSolver.setQ(i7);
        sSVDSolver.run();
        Path path7 = new Path(sSVDSolver.getUPath());
        Path path8 = new Path(path4, "unitvectors");
        UnitVectorizerJob.runJob(path7, path8);
        DistributedRowMatrix distributedRowMatrix2 = new DistributedRowMatrix(path8, new Path(path8, DataStorage.STORAGE_DIR_TMP), i2, i);
        distributedRowMatrix2.setConf(configuration2);
        Path rowPath = distributedRowMatrix2.getRowPath();
        Path buildFromEigens = EigenSeedGenerator.buildFromEigens(configuration, rowPath, new Path(path2, Cluster.INITIAL_CLUSTERS_DIR), i2, distanceMeasure);
        Path path9 = new Path(path2, "kmeans_out");
        KMeansDriver.run(configuration, rowPath, buildFromEigens, path9, d, i3, true, 0.0d, false);
        Path path10 = new Path(new Path(configuration.get("hadoop.tmp.dir")), "generic_input_mapping");
        ArrayList arrayList = new ArrayList();
        FileSystem fileSystem = FileSystem.get(path10.toUri(), configuration);
        if (fileSystem.exists(path10)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, path10, configuration);
            Text text = new Text();
            IntWritable intWritable = new IntWritable();
            while (reader.next(intWritable, text)) {
                arrayList.add(text.toString());
            }
            HadoopUtil.delete(configuration, path10);
        } else {
            log.warn("generic input mapping file not found!");
        }
        int i8 = 0;
        Iterator it2 = new SequenceFileIterable(new Path(new Path(path9, "clusteredPoints"), "part-m-00000"), configuration).iterator();
        while (it2.hasNext()) {
            Pair pair = (Pair) it2.next();
            if (arrayList.isEmpty()) {
                int i9 = i8;
                i8++;
                log.info("{}: {}", Integer.valueOf(i9), Integer.valueOf(((IntWritable) pair.getFirst()).get()));
            } else {
                int i10 = i8;
                i8++;
                log.info("{}: {}", arrayList.get(i10), Integer.valueOf(((IntWritable) pair.getFirst()).get()));
            }
        }
    }
}
