package org.apache.mahout.math.hadoop.similarity;

import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.MathHelper;
import org.apache.mahout.math.hadoop.similarity.RowSimilarityJob;
import org.apache.mahout.math.hadoop.similarity.vector.DistributedTanimotoCoefficientVectorSimilarity;
import org.apache.mahout.math.hadoop.similarity.vector.DistributedVectorSimilarity;
import org.easymock.IArgumentMatcher;
import org.easymock.classextension.EasyMock;
import org.junit.Test;

/* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.class */
public final class TestRowSimilarityJob extends MahoutTestCase {

    /* loaded from: input_file:org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob$DistributedTanimotoCoefficientExcludeIdentityVectorSimilarity.class */
    static class DistributedTanimotoCoefficientExcludeIdentityVectorSimilarity implements DistributedVectorSimilarity {
        private static final DistributedVectorSimilarity tanimoto = new DistributedTanimotoCoefficientVectorSimilarity();

        DistributedTanimotoCoefficientExcludeIdentityVectorSimilarity() {
        }

        public double similarity(int i, int i2, Iterable<Cooccurrence> iterable, double d, double d2, int i3) {
            if (i == i2) {
                return Double.NaN;
            }
            return tanimoto.similarity(i, i2, iterable, d, d2, i3);
        }

        public double weight(Vector vector) {
            return tanimoto.weight(vector);
        }
    }

    @Test
    public void testRowWeightMapper() throws Exception {
        Mapper.Context context = (Mapper.Context) EasyMock.createMock(Mapper.Context.class);
        context.write(new VarIntWritable(456), new WeightedOccurrence(123, 0.5d, 2.0d));
        context.write(new VarIntWritable(789), new WeightedOccurrence(123, 0.1d, 2.0d));
        EasyMock.replay(new Object[]{context});
        RandomAccessSparseVector randomAccessSparseVector = new RandomAccessSparseVector(Integer.MAX_VALUE);
        randomAccessSparseVector.set(456, 0.5d);
        randomAccessSparseVector.set(789, 0.1d);
        RowSimilarityJob.RowWeightMapper rowWeightMapper = new RowSimilarityJob.RowWeightMapper();
        setField(rowWeightMapper, "similarity", new DistributedTanimotoCoefficientVectorSimilarity());
        rowWeightMapper.map(new IntWritable(123), new VectorWritable(randomAccessSparseVector), context);
        EasyMock.verify(new Object[]{context});
    }

    @Test
    public void testWeightedOccurrencesPerColumnReducer() throws Exception {
        List asList = Arrays.asList(new WeightedOccurrence(45, 0.5d, 1.0d), new WeightedOccurrence(78, 3.0d, 9.0d));
        Reducer.Context context = (Reducer.Context) EasyMock.createMock(Reducer.Context.class);
        context.write(EasyMock.eq(new VarIntWritable(123)), weightedOccurrenceArrayMatches(asList));
        EasyMock.replay(new Object[]{context});
        new RowSimilarityJob.WeightedOccurrencesPerColumnReducer().reduce(new VarIntWritable(123), asList, context);
        EasyMock.verify(new Object[]{context});
    }

    static WeightedOccurrenceArray weightedOccurrenceArrayMatches(final Collection<WeightedOccurrence> collection) {
        EasyMock.reportMatcher(new IArgumentMatcher() { // from class: org.apache.mahout.math.hadoop.similarity.TestRowSimilarityJob.1
            public boolean matches(Object obj) {
                if (!(obj instanceof WeightedOccurrenceArray)) {
                    return false;
                }
                WeightedOccurrence[] weightedOccurrences = ((WeightedOccurrenceArray) obj).getWeightedOccurrences();
                if (weightedOccurrences.length != collection.size()) {
                    return false;
                }
                for (WeightedOccurrence weightedOccurrence : weightedOccurrences) {
                    if (!collection.contains(weightedOccurrence)) {
                        return false;
                    }
                }
                return true;
            }

            public void appendTo(StringBuffer stringBuffer) {
            }
        });
        return null;
    }

    @Test
    public void testCooccurrencesMapper() throws Exception {
        Mapper.Context context = (Mapper.Context) EasyMock.createMock(Mapper.Context.class);
        Counter counter = (Counter) EasyMock.createMock(Counter.class);
        context.write(new WeightedRowPair(34, 34, 1.0d, 1.0d), new Cooccurrence(12, 0.5d, 0.5d));
        context.write(new WeightedRowPair(34, 56, 1.0d, 3.0d), new Cooccurrence(12, 0.5d, 1.0d));
        context.write(new WeightedRowPair(56, 56, 3.0d, 3.0d), new Cooccurrence(12, 1.0d, 1.0d));
        EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
        counter.increment(3L);
        EasyMock.replay(new Object[]{context, counter});
        new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12), new WeightedOccurrenceArray(new WeightedOccurrence[]{new WeightedOccurrence(34, 0.5d, 1.0d), new WeightedOccurrence(56, 1.0d, 3.0d)}), context);
        EasyMock.verify(new Object[]{context, counter});
    }

    public void testCooccurrencesMapperOrdering() throws Exception {
        Mapper.Context context = (Mapper.Context) EasyMock.createMock(Mapper.Context.class);
        Counter counter = (Counter) EasyMock.createMock(Counter.class);
        context.write(new WeightedRowPair(34, 34, 1.0d, 1.0d), new Cooccurrence(12, 0.5d, 0.5d));
        context.write(new WeightedRowPair(34, 56, 1.0d, 3.0d), new Cooccurrence(12, 0.5d, 1.0d));
        context.write(new WeightedRowPair(56, 56, 3.0d, 3.0d), new Cooccurrence(12, 1.0d, 1.0d));
        EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
        counter.increment(3L);
        EasyMock.replay(new Object[]{context, counter});
        new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12), new WeightedOccurrenceArray(new WeightedOccurrence[]{new WeightedOccurrence(56, 1.0d, 3.0d), new WeightedOccurrence(34, 0.5d, 1.0d)}), context);
        EasyMock.verify(new Object[]{context, counter});
    }

    @Test
    public void testSimilarityReducer() throws Exception {
        Reducer.Context context = (Reducer.Context) EasyMock.createMock(Reducer.Context.class);
        Counter counter = (Counter) EasyMock.createMock(Counter.class);
        context.write(EasyMock.eq(new SimilarityMatrixEntryKey(12, 0.5d)), MathHelper.matrixEntryMatches(12, 34, 0.5d));
        context.write(EasyMock.eq(new SimilarityMatrixEntryKey(34, 0.5d)), MathHelper.matrixEntryMatches(34, 12, 0.5d));
        EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
        counter.increment(1L);
        EasyMock.replay(new Object[]{context, counter});
        RowSimilarityJob.SimilarityReducer similarityReducer = new RowSimilarityJob.SimilarityReducer();
        setField(similarityReducer, "similarity", new DistributedTanimotoCoefficientVectorSimilarity());
        similarityReducer.reduce(new WeightedRowPair(12, 34, 3.0d, 3.0d), Arrays.asList(new Cooccurrence(56, 1.0d, 2.0d), new Cooccurrence(78, 3.0d, 6.0d)), context);
        EasyMock.verify(new Object[]{context, counter});
    }

    @Test
    public void testSimilarityReducerSelfSimilarity() throws Exception {
        Reducer.Context context = (Reducer.Context) EasyMock.createMock(Reducer.Context.class);
        Counter counter = (Counter) EasyMock.createMock(Counter.class);
        context.write(EasyMock.eq(new SimilarityMatrixEntryKey(90, 1.0d)), MathHelper.matrixEntryMatches(90, 90, 1.0d));
        EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
        counter.increment(1L);
        EasyMock.replay(new Object[]{context, counter});
        RowSimilarityJob.SimilarityReducer similarityReducer = new RowSimilarityJob.SimilarityReducer();
        setField(similarityReducer, "similarity", new DistributedTanimotoCoefficientVectorSimilarity());
        similarityReducer.reduce(new WeightedRowPair(90, 90, 2.0d, 2.0d), Arrays.asList(new Cooccurrence(56, 1.0d, 2.0d), new Cooccurrence(78, 3.0d, 6.0d)), context);
        EasyMock.verify(new Object[]{context, counter});
    }

    @Test
    public void testEntriesToVectorsReducer() throws Exception {
        Reducer.Context context = (Reducer.Context) EasyMock.createMock(Reducer.Context.class);
        context.write(EasyMock.eq(new IntWritable(12)), MathHelper.vectorMatches(MathHelper.elem(34, 0.8d)));
        EasyMock.replay(new Object[]{context});
        RowSimilarityJob.EntriesToVectorsReducer entriesToVectorsReducer = new RowSimilarityJob.EntriesToVectorsReducer();
        setField(entriesToVectorsReducer, "maxSimilaritiesPerRow", 1);
        entriesToVectorsReducer.reduce(new SimilarityMatrixEntryKey(12, 1.0d), Arrays.asList(MathHelper.matrixEntry(12, 34, 0.8d), MathHelper.matrixEntry(12, 56, 0.7d)), context);
        EasyMock.verify(new Object[]{context});
    }

    /* JADX WARN: Type inference failed for: r0v14, types: [double[], double[][]] */
    @Test
    public void testSmallSampleMatrix() throws Exception {
        File testTempFile = getTestTempFile("rows");
        File testTempDir = getTestTempDir("output");
        testTempDir.delete();
        File testTempDir2 = getTestTempDir("tmp");
        Configuration configuration = new Configuration();
        Path path = new Path(testTempFile.getAbsolutePath());
        MathHelper.writeEntries(new double[]{new double[]{1.0d, 0.0d, 1.0d, 1.0d, 0.0d}, new double[]{0.0d, 0.0d, 1.0d, 1.0d, 0.0d}, new double[]{0.0d, 0.0d, 0.0d, 0.0d, 1.0d}}, FileSystem.get(path.toUri(), configuration), configuration, path);
        configuration.set("mapred.input.dir", testTempFile.getAbsolutePath());
        configuration.set("mapred.output.dir", testTempDir.getAbsolutePath());
        configuration.setBoolean("mapred.output.compress", false);
        RowSimilarityJob rowSimilarityJob = new RowSimilarityJob();
        rowSimilarityJob.setConf(configuration);
        rowSimilarityJob.run(new String[]{"--numberOfColumns", "3", "--similarityClassname", DistributedTanimotoCoefficientVectorSimilarity.class.getName(), "--tempDir", testTempDir2.getAbsolutePath()});
        Matrix readEntries = MathHelper.readEntries(configuration, new Path(testTempDir.getAbsolutePath(), "part-r-00000"), 3, 3);
        assertNotNull(readEntries);
        assertEquals(3L, readEntries.numCols());
        assertEquals(3L, readEntries.numRows());
        assertEquals(1.0d, readEntries.get(0, 0), 1.0E-6d);
        assertEquals(1.0d, readEntries.get(1, 1), 1.0E-6d);
        assertEquals(1.0d, readEntries.get(2, 2), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(2, 0), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(2, 1), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(0, 2), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(1, 2), 1.0E-6d);
        assertEquals(0.6666d, readEntries.get(0, 1), 1.0E-4d);
        assertEquals(0.6666d, readEntries.get(1, 0), 1.0E-4d);
    }

    /* JADX WARN: Type inference failed for: r0v14, types: [double[], double[][]] */
    @Test
    public void testLimitEntriesInSimilarityMatrix() throws Exception {
        File testTempFile = getTestTempFile("rows");
        File testTempDir = getTestTempDir("output");
        testTempDir.delete();
        File testTempDir2 = getTestTempDir("tmp");
        Configuration configuration = new Configuration();
        Path path = new Path(testTempFile.getAbsolutePath());
        MathHelper.writeEntries(new double[]{new double[]{1.0d, 0.0d, 1.0d, 1.0d, 0.0d, 1.0d}, new double[]{0.0d, 1.0d, 1.0d, 1.0d, 1.0d, 1.0d}, new double[]{1.0d, 1.0d, 0.0d, 1.0d, 0.0d, 0.0d}}, FileSystem.get(path.toUri(), configuration), configuration, path);
        configuration.set("mapred.input.dir", testTempFile.getAbsolutePath());
        configuration.set("mapred.output.dir", testTempDir.getAbsolutePath());
        configuration.setBoolean("mapred.output.compress", false);
        RowSimilarityJob rowSimilarityJob = new RowSimilarityJob();
        rowSimilarityJob.setConf(configuration);
        rowSimilarityJob.run(new String[]{"--numberOfColumns", "3", "--maxSimilaritiesPerRow", "1", "--similarityClassname", DistributedTanimotoCoefficientExcludeIdentityVectorSimilarity.class.getName(), "--tempDir", testTempDir2.getAbsolutePath()});
        Matrix readEntries = MathHelper.readEntries(configuration, new Path(testTempDir.getAbsolutePath(), "part-r-00000"), 3, 3);
        assertNotNull(readEntries);
        assertEquals(3L, readEntries.numCols());
        assertEquals(3L, readEntries.numRows());
        assertEquals(0.0d, readEntries.get(0, 0), 1.0E-6d);
        assertEquals(0.5d, readEntries.get(0, 1), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(0, 2), 1.0E-6d);
        assertEquals(0.5d, readEntries.get(1, 0), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(1, 1), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(1, 2), 1.0E-6d);
        assertEquals(0.4d, readEntries.get(2, 0), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(2, 1), 1.0E-6d);
        assertEquals(0.0d, readEntries.get(2, 2), 1.0E-6d);
    }
}
