package org.apache.mahout.clustering.topdown.postprocessor;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.WritableComparable;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.topdown.PathDirectory;
import org.apache.mahout.common.DummyOutputCollector;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

/* loaded from: input_file:org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorTest.class */
public final class ClusterOutputPostProcessorTest extends MahoutTestCase {
    private static final double[][] REFERENCE = {new double[]{1.0d, 1.0d}, new double[]{2.0d, 1.0d}, new double[]{1.0d, 2.0d}, new double[]{4.0d, 4.0d}, new double[]{5.0d, 4.0d}, new double[]{4.0d, 5.0d}, new double[]{5.0d, 5.0d}};
    private FileSystem fs;
    private Path outputPath;
    private Configuration conf;

    @Override // org.apache.mahout.common.MahoutTestCase
    @Before
    public void setUp() throws Exception {
        super.setUp();
        this.fs = FileSystem.get(new Configuration());
    }

    private static List<VectorWritable> getPointsWritable(double[][] dArr) {
        ArrayList newArrayList = Lists.newArrayList();
        for (double[] dArr2 : dArr) {
            RandomAccessSparseVector randomAccessSparseVector = new RandomAccessSparseVector(dArr2.length);
            randomAccessSparseVector.assign(dArr2);
            newArrayList.add(new VectorWritable(randomAccessSparseVector));
        }
        return newArrayList;
    }

    @Test
    public void testTopDownClustering() throws Exception {
        List<VectorWritable> pointsWritable = getPointsWritable(REFERENCE);
        Path testTempDirPath = getTestTempDirPath("points");
        this.conf = new Configuration();
        ClusteringTestUtils.writePointsToFile(pointsWritable, new Path(testTempDirPath, "file1"), this.fs, this.conf);
        ClusteringTestUtils.writePointsToFile(pointsWritable, new Path(testTempDirPath, "file2"), this.fs, this.conf);
        this.outputPath = getTestTempDirPath("output");
        topLevelClustering(testTempDirPath, this.conf);
        Map<String, Path> ouputPostProcessing = ouputPostProcessing(this.conf);
        assertPostProcessedOutput(ouputPostProcessing);
        bottomLevelClustering(ouputPostProcessing);
    }

    private void assertTopLevelCluster(Map.Entry<String, Path> entry) {
        String key = entry.getKey();
        Path value = entry.getValue();
        try {
            if ("0".equals(key)) {
                assertPointsInFirstTopLevelCluster(value);
            } else if ("1".equals(key)) {
                assertPointsInSecondTopLevelCluster(value);
            }
        } catch (IOException e) {
            Assert.fail("Exception occurred while asserting top level cluster.");
        }
    }

    private void assertPointsInFirstTopLevelCluster(Path path) throws IOException {
        Iterator<Vector> it = getVectorsInCluster(path).iterator();
        while (it.hasNext()) {
            Assert.assertTrue(ArrayUtils.contains(new String[]{"{0:1.0,1:1.0}", "{0:2.0,1:1.0}", "{0:1.0,1:2.0}"}, it.next().asFormatString()));
        }
    }

    private void assertPointsInSecondTopLevelCluster(Path path) throws IOException {
        Iterator<Vector> it = getVectorsInCluster(path).iterator();
        while (it.hasNext()) {
            Assert.assertTrue(ArrayUtils.contains(new String[]{"{0:4.0,1:4.0}", "{0:5.0,1:4.0}", "{0:4.0,1:5.0}", "{0:5.0,1:5.0}"}, it.next().asFormatString()));
        }
    }

    private List<Vector> getVectorsInCluster(Path path) throws IOException {
        FileStatus[] listStatus = this.fs.listStatus(FileUtil.stat2Paths(this.fs.globStatus(path)));
        ArrayList newArrayList = Lists.newArrayList();
        for (FileStatus fileStatus : listStatus) {
            SequenceFile.Reader reader = new SequenceFile.Reader(this.fs, fileStatus.getPath(), this.conf);
            LongWritable longWritable = new LongWritable();
            VectorWritable vectorWritable = new VectorWritable();
            while (reader.next(longWritable, vectorWritable)) {
                newArrayList.add(vectorWritable.get());
            }
        }
        return newArrayList;
    }

    private void bottomLevelClustering(Map<String, Path> map) throws IOException, InterruptedException, ClassNotFoundException {
        for (Map.Entry<String, Path> entry : map.entrySet()) {
            String key = entry.getKey();
            Path value = entry.getValue();
            Path bottomLevelClusterPath = PathDirectory.getBottomLevelClusterPath(this.outputPath, key);
            CanopyDriver.run(this.conf, value, bottomLevelClusterPath, new ManhattanDistanceMeasure(), 2.1d, 2.0d, true, 0.0d, true);
            assertBottomLevelCluster(bottomLevelClusterPath);
        }
    }

    private void assertBottomLevelCluster(Path path) {
        Path path2 = new Path(path, "clusteredPoints");
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        Iterator it = new SequenceFileIterable(new Path(path2, "part-m-0"), this.conf).iterator();
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            dummyOutputCollector.collect((DummyOutputCollector) pair.getFirst(), (WritableComparable) pair.getSecond());
        }
        int size = dummyOutputCollector.getKeys().size();
        assertTrue(size == 1 || size == 2);
    }

    private void assertPostProcessedOutput(Map<String, Path> map) {
        Iterator<Map.Entry<String, Path>> it = map.entrySet().iterator();
        while (it.hasNext()) {
            assertTopLevelCluster(it.next());
        }
    }

    private Map<String, Path> ouputPostProcessing(Configuration configuration) throws IOException {
        ClusterOutputPostProcessor clusterOutputPostProcessor = new ClusterOutputPostProcessor(this.outputPath, this.outputPath, configuration);
        clusterOutputPostProcessor.process();
        return clusterOutputPostProcessor.getPostProcessedClusterDirectories();
    }

    private void topLevelClustering(Path path, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException {
        CanopyDriver.run(configuration, path, this.outputPath, new ManhattanDistanceMeasure(), 3.1d, 2.1d, true, 0.0d, true);
    }
}
