package org.apache.mahout.math.hadoop.similarity;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import org.apache.hadoop.util.ToolRunner;
import org.apache.lucene.index.IndexFileNames;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.apache.mahout.math.hadoop.similarity.SimilarityMatrixEntryKey;
import org.apache.mahout.math.hadoop.similarity.vector.DistributedVectorSimilarity;

/* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.class */
public class RowSimilarityJob extends AbstractJob {
    public static final String DISTRIBUTED_SIMILARITY_CLASSNAME = RowSimilarityJob.class.getName() + ".distributedSimilarityClassname";
    public static final String NUMBER_OF_COLUMNS = RowSimilarityJob.class.getName() + ".numberOfColumns";
    public static final String MAX_SIMILARITIES_PER_ROW = RowSimilarityJob.class.getName() + ".maxSimilaritiesPerRow";
    private static final int DEFAULT_MAX_SIMILARITIES_PER_ROW = 100;

    /* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/RowSimilarityJob$CooccurrencesMapper.class */
    public static class CooccurrencesMapper extends Mapper<VarIntWritable, WeightedOccurrenceArray, WeightedRowPair, Cooccurrence> {
        protected void map(VarIntWritable varIntWritable, WeightedOccurrenceArray weightedOccurrenceArray, Mapper<VarIntWritable, WeightedOccurrenceArray, WeightedRowPair, Cooccurrence>.Context context) throws IOException, InterruptedException {
            WeightedOccurrence[] weightedOccurrences = weightedOccurrenceArray.getWeightedOccurrences();
            WeightedRowPair weightedRowPair = new WeightedRowPair();
            Cooccurrence cooccurrence = new Cooccurrence();
            int i = 0;
            for (int i2 = 0; i2 < weightedOccurrences.length; i2++) {
                int row = weightedOccurrences[i2].getRow();
                double weight = weightedOccurrences[i2].getWeight();
                double value = weightedOccurrences[i2].getValue();
                for (int i3 = i2; i3 < weightedOccurrences.length; i3++) {
                    int row2 = weightedOccurrences[i3].getRow();
                    double weight2 = weightedOccurrences[i3].getWeight();
                    double value2 = weightedOccurrences[i3].getValue();
                    if (row <= row2) {
                        weightedRowPair.set(row, row2, weight, weight2);
                    } else {
                        weightedRowPair.set(row2, row, weight2, weight);
                    }
                    cooccurrence.set(varIntWritable.get(), value, value2);
                    context.write(weightedRowPair, cooccurrence);
                    i++;
                }
            }
            context.getCounter(Counter.COOCCURRENCES).increment(i);
        }

        protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((VarIntWritable) obj, (WeightedOccurrenceArray) obj2, (Mapper<VarIntWritable, WeightedOccurrenceArray, WeightedRowPair, Cooccurrence>.Context) context);
        }
    }

    /* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/RowSimilarityJob$Counter.class */
    public enum Counter {
        COOCCURRENCES,
        SIMILAR_ROWS
    }

    /* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/RowSimilarityJob$EntriesToVectorsReducer.class */
    public static class EntriesToVectorsReducer extends Reducer<SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable, IntWritable, VectorWritable> {
        private int maxSimilaritiesPerRow;

        protected void setup(Reducer<SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable, IntWritable, VectorWritable>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            this.maxSimilaritiesPerRow = context.getConfiguration().getInt(RowSimilarityJob.MAX_SIMILARITIES_PER_ROW, -1);
            if (this.maxSimilaritiesPerRow < 1) {
                throw new IllegalStateException("Maximum number of similarities per row was not correctly set!");
            }
        }

        protected void reduce(SimilarityMatrixEntryKey similarityMatrixEntryKey, Iterable<DistributedRowMatrix.MatrixEntryWritable> iterable, Reducer<SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable, IntWritable, VectorWritable>.Context context) throws IOException, InterruptedException {
            RandomAccessSparseVector randomAccessSparseVector = new RandomAccessSparseVector(Integer.MAX_VALUE, this.maxSimilaritiesPerRow);
            int i = 0;
            for (DistributedRowMatrix.MatrixEntryWritable matrixEntryWritable : iterable) {
                randomAccessSparseVector.setQuick(matrixEntryWritable.getCol(), matrixEntryWritable.getVal());
                i++;
                if (i == this.maxSimilaritiesPerRow) {
                    break;
                }
            }
            context.write(new IntWritable(similarityMatrixEntryKey.getRow()), new VectorWritable(new SequentialAccessSparseVector(randomAccessSparseVector)));
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((SimilarityMatrixEntryKey) obj, (Iterable<DistributedRowMatrix.MatrixEntryWritable>) iterable, (Reducer<SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable, IntWritable, VectorWritable>.Context) context);
        }
    }

    /* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/RowSimilarityJob$RowWeightMapper.class */
    public static class RowWeightMapper extends Mapper<IntWritable, VectorWritable, VarIntWritable, WeightedOccurrence> {
        private DistributedVectorSimilarity similarity;

        protected void setup(Mapper<IntWritable, VectorWritable, VarIntWritable, WeightedOccurrence>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            this.similarity = RowSimilarityJob.instantiateSimilarity(context.getConfiguration().get(RowSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME));
        }

        protected void map(IntWritable intWritable, VectorWritable vectorWritable, Mapper<IntWritable, VectorWritable, VarIntWritable, WeightedOccurrence>.Context context) throws IOException, InterruptedException {
            Vector vector = vectorWritable.get();
            double weight = this.similarity.weight(vector);
            Iterator<Vector.Element> iterateNonZero = vector.iterateNonZero();
            while (iterateNonZero.hasNext()) {
                Vector.Element next = iterateNonZero.next();
                context.write(new VarIntWritable(next.index()), new WeightedOccurrence(intWritable.get(), next.get(), weight));
            }
        }

        protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((IntWritable) obj, (VectorWritable) obj2, (Mapper<IntWritable, VectorWritable, VarIntWritable, WeightedOccurrence>.Context) context);
        }
    }

    /* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/RowSimilarityJob$SimilarityReducer.class */
    public static class SimilarityReducer extends Reducer<WeightedRowPair, Cooccurrence, SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable> {
        private DistributedVectorSimilarity similarity;
        private int numberOfColumns;

        protected void setup(Reducer<WeightedRowPair, Cooccurrence, SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            this.similarity = RowSimilarityJob.instantiateSimilarity(context.getConfiguration().get(RowSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME));
            this.numberOfColumns = context.getConfiguration().getInt(RowSimilarityJob.NUMBER_OF_COLUMNS, -1);
            if (this.numberOfColumns < 1) {
                throw new IllegalStateException("Number of columns was not correctly set!");
            }
        }

        protected void reduce(WeightedRowPair weightedRowPair, Iterable<Cooccurrence> iterable, Reducer<WeightedRowPair, Cooccurrence, SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable>.Context context) throws IOException, InterruptedException {
            int rowA = weightedRowPair.getRowA();
            int rowB = weightedRowPair.getRowB();
            double similarity = this.similarity.similarity(rowA, rowB, iterable, weightedRowPair.getWeightA(), weightedRowPair.getWeightB(), this.numberOfColumns);
            if (Double.isNaN(similarity)) {
                return;
            }
            context.getCounter(Counter.SIMILAR_ROWS).increment(1L);
            SimilarityMatrixEntryKey similarityMatrixEntryKey = new SimilarityMatrixEntryKey();
            DistributedRowMatrix.MatrixEntryWritable matrixEntryWritable = new DistributedRowMatrix.MatrixEntryWritable();
            matrixEntryWritable.setVal(similarity);
            matrixEntryWritable.setRow(rowA);
            matrixEntryWritable.setCol(rowB);
            similarityMatrixEntryKey.set(rowA, similarity);
            context.write(similarityMatrixEntryKey, matrixEntryWritable);
            if (rowA != rowB) {
                matrixEntryWritable.setRow(rowB);
                matrixEntryWritable.setCol(rowA);
                similarityMatrixEntryKey.set(rowB, similarity);
                context.write(similarityMatrixEntryKey, matrixEntryWritable);
            }
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((WeightedRowPair) obj, (Iterable<Cooccurrence>) iterable, (Reducer<WeightedRowPair, Cooccurrence, SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable>.Context) context);
        }
    }

    /* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/RowSimilarityJob$WeightedOccurrencesPerColumnReducer.class */
    public static class WeightedOccurrencesPerColumnReducer extends Reducer<VarIntWritable, WeightedOccurrence, VarIntWritable, WeightedOccurrenceArray> {
        protected void reduce(VarIntWritable varIntWritable, Iterable<WeightedOccurrence> iterable, Reducer<VarIntWritable, WeightedOccurrence, VarIntWritable, WeightedOccurrenceArray>.Context context) throws IOException, InterruptedException {
            ArrayList arrayList = new ArrayList();
            Iterator<WeightedOccurrence> it = iterable.iterator();
            while (it.hasNext()) {
                arrayList.add(it.next().m739clone());
            }
            context.write(varIntWritable, new WeightedOccurrenceArray((WeightedOccurrence[]) arrayList.toArray(new WeightedOccurrence[arrayList.size()])));
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((VarIntWritable) obj, (Iterable<WeightedOccurrence>) iterable, (Reducer<VarIntWritable, WeightedOccurrence, VarIntWritable, WeightedOccurrenceArray>.Context) context);
        }
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new RowSimilarityJob(), strArr);
    }

    public int run(String[] strArr) throws IOException, ClassNotFoundException, InterruptedException {
        String str;
        addInputOption();
        addOutputOption();
        addOption("numberOfColumns", "r", "Number of columns in the input matrix");
        addOption("similarityClassname", IndexFileNames.SEPARATE_NORMS_EXTENSION, "Name of distributed similarity class to instantiate, alternatively use one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
        addOption("maxSimilaritiesPerRow", FuzzyKMeansDriver.M_OPTION, "Number of maximum similarities per row (default: 100)", String.valueOf(100));
        Map<String, String> parseArguments = parseArguments(strArr);
        if (parseArguments == null) {
            return -1;
        }
        int parseInt = Integer.parseInt(parseArguments.get("--numberOfColumns"));
        String str2 = parseArguments.get("--similarityClassname");
        try {
            str = SimilarityType.valueOf(str2).getSimilarityImplementationClassName();
        } catch (IllegalArgumentException e) {
            str = str2;
        }
        int parseInt2 = Integer.parseInt(parseArguments.get("--maxSimilaritiesPerRow"));
        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();
        Path path = new Path(parseArguments.get("--tempDir"));
        Path path2 = new Path(path, "weights");
        Path path3 = new Path(path, "pairwiseSimilarity");
        AtomicInteger atomicInteger = new AtomicInteger();
        if (shouldRunNextPhase(parseArguments, atomicInteger)) {
            Job prepareJob = prepareJob(inputPath, path2, SequenceFileInputFormat.class, RowWeightMapper.class, VarIntWritable.class, WeightedOccurrence.class, WeightedOccurrencesPerColumnReducer.class, VarIntWritable.class, WeightedOccurrenceArray.class, SequenceFileOutputFormat.class);
            prepareJob.getConfiguration().set(DISTRIBUTED_SIMILARITY_CLASSNAME, str);
            prepareJob.waitForCompletion(true);
        }
        if (shouldRunNextPhase(parseArguments, atomicInteger)) {
            Job prepareJob2 = prepareJob(path2, path3, SequenceFileInputFormat.class, CooccurrencesMapper.class, WeightedRowPair.class, Cooccurrence.class, SimilarityReducer.class, SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class, SequenceFileOutputFormat.class);
            Configuration configuration = prepareJob2.getConfiguration();
            configuration.set(DISTRIBUTED_SIMILARITY_CLASSNAME, str);
            configuration.setInt(NUMBER_OF_COLUMNS, parseInt);
            prepareJob2.waitForCompletion(true);
        }
        if (!shouldRunNextPhase(parseArguments, atomicInteger)) {
            return 0;
        }
        Job prepareJob3 = prepareJob(path3, outputPath, SequenceFileInputFormat.class, Mapper.class, SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class, EntriesToVectorsReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
        prepareJob3.setPartitionerClass(HashPartitioner.class);
        prepareJob3.setGroupingComparatorClass(SimilarityMatrixEntryKey.SimilarityMatrixEntryKeyGroupingComparator.class);
        prepareJob3.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, parseInt2);
        prepareJob3.waitForCompletion(true);
        return 0;
    }

    static DistributedVectorSimilarity instantiateSimilarity(String str) {
        try {
            return (DistributedVectorSimilarity) Class.forName(str).asSubclass(DistributedVectorSimilarity.class).newInstance();
        } catch (ClassNotFoundException e) {
            throw new IllegalStateException(e);
        } catch (IllegalAccessException e2) {
            throw new IllegalStateException(e2);
        } catch (InstantiationException e3) {
            throw new IllegalStateException(e3);
        }
    }
}
