package org.apache.mahout.clustering.fuzzykmeans;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.kmeans.TestKmeansClustering;
import org.apache.mahout.common.DummyOutputCollector;
import org.apache.mahout.common.DummyReporter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;

/* loaded from: input_file:org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.class */
public class TestFuzzyKmeansClustering extends MahoutTestCase {
    private FileSystem fs;

    private static void rmr(String str) {
        File file = new File(str);
        if (file.exists()) {
            if (file.isDirectory()) {
                for (String str2 : file.list()) {
                    rmr(file.toString() + File.separator + str2);
                }
            }
            file.delete();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.mahout.common.MahoutTestCase
    public void setUp() throws Exception {
        super.setUp();
        rmr("output");
        rmr("testdata");
        this.fs = FileSystem.get(new Configuration());
    }

    private static double round(double d, int i) {
        return Math.round(d * r0) / ((long) Math.pow(10.0d, i));
    }

    private static Vector tweakValue(Vector vector) {
        return vector.plus(0.1d);
    }

    private static void computeCluster(List<Vector> list, List<SoftCluster> list2, FuzzyKMeansClusterer fuzzyKMeansClusterer, Map<String, String> map) {
        for (Vector vector : list) {
            StringBuilder sb = new StringBuilder("[");
            ArrayList arrayList = new ArrayList();
            Iterator<SoftCluster> it = list2.iterator();
            while (it.hasNext()) {
                arrayList.add(Double.valueOf(fuzzyKMeansClusterer.getMeasure().distance(vector, it.next().getCenter())));
            }
            for (int i = 0; i < list2.size(); i++) {
                sb.append(list2.get(i).getId()).append(':').append(fuzzyKMeansClusterer.computeProbWeight(((Double) arrayList.get(i)).doubleValue(), arrayList)).append(' ');
            }
            String name = vector.getName();
            map.put((name == null || name.length() == 0) ? vector.asFormatString().trim() : name, sb.toString().trim() + ']');
        }
    }

    public void testReferenceImplementation() throws Exception {
        List<Vector> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
        for (int i = 0; i < points.size(); i++) {
            System.out.println("test k= " + i);
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < i + 1; i2++) {
                SoftCluster softCluster = new SoftCluster(tweakValue(points.get(i2)));
                softCluster.addPoint(softCluster.getCenter(), 1.0d);
                arrayList.add(softCluster);
            }
            HashMap hashMap = new HashMap();
            List clusterPoints = FuzzyKMeansClusterer.clusterPoints(points, arrayList, new EuclideanDistanceMeasure(), 0.001d, 2.0d, 2);
            computeCluster(points, (List) clusterPoints.get(clusterPoints.size() - 1), new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(), 0.001d, 2.0d), hashMap);
            for (String str : hashMap.values()) {
                String[] split = str.substring(1, str.length() - 1).split(" ");
                assertEquals("Number of clusters", i + 1, split.length);
                double d = 0.0d;
                for (String str2 : split) {
                    d += Double.parseDouble(str2.split(":")[1]);
                }
                assertEquals("Sum of cluster Membership problability should be equal to=", Double.valueOf(1.0d), Double.valueOf(round(d, 1)));
            }
        }
    }

    public void testFuzzyKMeansMRJob() throws Exception {
        List<VectorWritable> pointsWritable = TestKmeansClustering.getPointsWritable(TestKmeansClustering.reference);
        File file = new File("testdata");
        if (!file.exists()) {
            file.mkdir();
        }
        File file2 = new File("testdata/points");
        if (!file2.exists()) {
            file2.mkdir();
        }
        Configuration configuration = new Configuration();
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/points/file1", this.fs, configuration);
        for (int i = 0; i < pointsWritable.size(); i++) {
            System.out.println("testKFuzzyKMeansMRJob k= " + i);
            JobConf jobConf = new JobConf(FuzzyKMeansDriver.class);
            Path path = new Path("testdata/clusters");
            FileSystem fileSystem = FileSystem.get(path.toUri(), jobConf);
            if (fileSystem.exists(path)) {
                fileSystem.delete(path, true);
            }
            File file3 = new File("testdata/clusters");
            if (!file3.exists()) {
                file3.mkdir();
            }
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path("testdata/clusters/part-00000"), Text.class, SoftCluster.class);
            for (int i2 = 0; i2 < i + 1; i2++) {
                SoftCluster softCluster = new SoftCluster(tweakValue(pointsWritable.get(i2).get()));
                softCluster.addPoint(softCluster.getCenter(), 1.0d);
                writer.append(new Text(softCluster.getIdentifier()), softCluster);
            }
            writer.close();
            Path path2 = new Path("output");
            FileSystem fileSystem2 = FileSystem.get(path2.toUri(), configuration);
            if (fileSystem2.exists(path2)) {
                fileSystem2.delete(path2, true);
            }
            fileSystem2.mkdirs(path2);
            FuzzyKMeansDriver.runJob("testdata/points", "testdata/clusters", "output", EuclideanDistanceMeasure.class.getName(), 0.001d, 2, 1, i + 1, 2.0f);
            File file4 = new File("output/points");
            assertTrue("output dir exists?", file4.exists());
            file4.list();
            SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem2, new Path("output/points/part-00000"), configuration);
            Text text = new Text();
            FuzzyKMeansOutput fuzzyKMeansOutput = new FuzzyKMeansOutput();
            while (reader.next(text, fuzzyKMeansOutput)) {
                double d = 0.0d;
                for (double d2 : fuzzyKMeansOutput.getProbabilities()) {
                    d += d2;
                }
                assertEquals("Sum of cluster Membership probability should be equal to=", Double.valueOf(1.0d), Double.valueOf(round(d, 1)));
            }
            reader.close();
        }
    }

    public void testFuzzyKMeansMapper() throws Exception {
        List<VectorWritable> pointsWritable = TestKmeansClustering.getPointsWritable(TestKmeansClustering.reference);
        for (int i = 0; i < pointsWritable.size(); i++) {
            System.out.println("testKFuzzyKMeansMRJob k= " + i);
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < i + 1; i2++) {
                SoftCluster softCluster = new SoftCluster(tweakValue(pointsWritable.get(i2).get()), i2);
                softCluster.addPoint(softCluster.getCenter(), 1.0d);
                arrayList.add(softCluster);
            }
            FuzzyKMeansMapper fuzzyKMeansMapper = new FuzzyKMeansMapper();
            fuzzyKMeansMapper.config(arrayList);
            JobConf jobConf = new JobConf();
            jobConf.set("org.apache.mahout.clustering.kmeans.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
            jobConf.set("org.apache.mahout.clustering.kmeans.convergence", "0.001");
            jobConf.set("org.apache.mahout.clustering.fuzzykmeans.m", "2");
            fuzzyKMeansMapper.configure(jobConf);
            DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
            Iterator<VectorWritable> it = pointsWritable.iterator();
            while (it.hasNext()) {
                fuzzyKMeansMapper.map(new Text(), it.next(), dummyOutputCollector, (Reporter) null);
            }
            assertEquals("Mapper Keys", i + 1, dummyOutputCollector.getData().size());
            HashMap hashMap = new HashMap();
            Iterator<String> it2 = dummyOutputCollector.getKeys().iterator();
            while (it2.hasNext()) {
                for (FuzzyKMeansInfo fuzzyKMeansInfo : dummyOutputCollector.getValue(it2.next())) {
                    Double d = (Double) hashMap.get(fuzzyKMeansInfo.getVector());
                    double d2 = 0.0d;
                    if (d != null) {
                        d2 = d.doubleValue();
                    }
                    hashMap.put(fuzzyKMeansInfo.getVector(), Double.valueOf(d2 + fuzzyKMeansInfo.getProbability()));
                }
            }
            for (Map.Entry entry : hashMap.entrySet()) {
                assertEquals("total Prob for Point:" + ((Vector) entry.getKey()), Double.valueOf(1.0d), Double.valueOf(round(((Double) entry.getValue()).doubleValue(), 1)));
            }
        }
    }

    public void testFuzzyKMeansCombiner() throws Exception {
        List<VectorWritable> pointsWritable = TestKmeansClustering.getPointsWritable(TestKmeansClustering.reference);
        for (int i = 0; i < pointsWritable.size(); i++) {
            System.out.println("testKFuzzyKMeansMRJob k= " + i);
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < i + 1; i2++) {
                SoftCluster softCluster = new SoftCluster(tweakValue(pointsWritable.get(i2).get()), i2);
                softCluster.addPoint(softCluster.getCenter(), 1.0d);
                arrayList.add(softCluster);
            }
            FuzzyKMeansMapper fuzzyKMeansMapper = new FuzzyKMeansMapper();
            fuzzyKMeansMapper.config(arrayList);
            JobConf jobConf = new JobConf();
            jobConf.set("org.apache.mahout.clustering.kmeans.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
            jobConf.set("org.apache.mahout.clustering.kmeans.convergence", "0.001");
            jobConf.set("org.apache.mahout.clustering.fuzzykmeans.m", "2");
            fuzzyKMeansMapper.configure(jobConf);
            DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
            Iterator<VectorWritable> it = pointsWritable.iterator();
            while (it.hasNext()) {
                fuzzyKMeansMapper.map(new Text(), it.next(), dummyOutputCollector, (Reporter) null);
            }
            DummyOutputCollector dummyOutputCollector2 = new DummyOutputCollector();
            FuzzyKMeansCombiner fuzzyKMeansCombiner = new FuzzyKMeansCombiner();
            fuzzyKMeansCombiner.configure(jobConf);
            for (String str : dummyOutputCollector.getKeys()) {
                fuzzyKMeansCombiner.reduce(new Text(str), dummyOutputCollector.getValue(str).iterator(), dummyOutputCollector2, (Reporter) null);
            }
            assertEquals("Combiner Output", i + 1, dummyOutputCollector2.getData().size());
            Iterator<String> it2 = dummyOutputCollector2.getKeys().iterator();
            while (it2.hasNext()) {
                assertEquals("too many values", 1, dummyOutputCollector2.getValue(it2.next()).size());
            }
        }
    }

    public void testFuzzyKMeansReducer() throws Exception {
        List<VectorWritable> pointsWritable = TestKmeansClustering.getPointsWritable(TestKmeansClustering.reference);
        for (int i = 0; i < pointsWritable.size(); i++) {
            System.out.println("testKFuzzyKMeansMRJob k= " + i);
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < i + 1; i2++) {
                arrayList.add(new SoftCluster(tweakValue(pointsWritable.get(i2).get()), i2));
            }
            FuzzyKMeansMapper fuzzyKMeansMapper = new FuzzyKMeansMapper();
            fuzzyKMeansMapper.config(arrayList);
            JobConf jobConf = new JobConf();
            jobConf.set("org.apache.mahout.clustering.kmeans.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
            jobConf.set("org.apache.mahout.clustering.kmeans.convergence", "0.001");
            jobConf.set("org.apache.mahout.clustering.fuzzykmeans.m", "2");
            fuzzyKMeansMapper.configure(jobConf);
            DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
            Iterator<VectorWritable> it = pointsWritable.iterator();
            while (it.hasNext()) {
                fuzzyKMeansMapper.map(new Text(), it.next(), dummyOutputCollector, (Reporter) null);
            }
            DummyOutputCollector dummyOutputCollector2 = new DummyOutputCollector();
            FuzzyKMeansCombiner fuzzyKMeansCombiner = new FuzzyKMeansCombiner();
            fuzzyKMeansCombiner.configure(jobConf);
            for (String str : dummyOutputCollector.getKeys()) {
                fuzzyKMeansCombiner.reduce(new Text(str), dummyOutputCollector.getValue(str).iterator(), dummyOutputCollector2, (Reporter) null);
            }
            DummyOutputCollector dummyOutputCollector3 = new DummyOutputCollector();
            FuzzyKMeansReducer fuzzyKMeansReducer = new FuzzyKMeansReducer();
            fuzzyKMeansReducer.config(arrayList);
            fuzzyKMeansReducer.configure(jobConf);
            for (String str2 : dummyOutputCollector2.getKeys()) {
                fuzzyKMeansReducer.reduce(new Text(str2), dummyOutputCollector2.getValue(str2).iterator(), dummyOutputCollector3, new DummyReporter());
            }
            assertEquals("Reducer Output", i + 1, dummyOutputCollector2.getData().size());
            ArrayList<SoftCluster> arrayList2 = new ArrayList();
            for (int i3 = 0; i3 < i + 1; i3++) {
                arrayList2.add(new SoftCluster(tweakValue(pointsWritable.get(i3).get()), i3));
            }
            ArrayList arrayList3 = new ArrayList();
            Iterator<VectorWritable> it2 = pointsWritable.iterator();
            while (it2.hasNext()) {
                arrayList3.add(it2.next().get());
            }
            FuzzyKMeansClusterer.runFuzzyKMeansIteration(arrayList3, arrayList2, new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(), 0.001d, 2.0d));
            for (SoftCluster softCluster : arrayList2) {
                SoftCluster softCluster2 = (SoftCluster) dummyOutputCollector3.getValue(softCluster.getIdentifier()).get(0);
                System.out.println("ref= " + softCluster.toString() + " cluster= " + softCluster2.toString());
                softCluster2.recomputeCenter();
                assertEquals("key center: " + softCluster.getCenter().asFormatString() + " does not equal cluster: " + softCluster2.getCenter().asFormatString(), softCluster.getCenter(), softCluster2.getCenter());
            }
        }
    }

    public void testFuzzyKMeansClusterMapper() throws Exception {
        List<VectorWritable> pointsWritable = TestKmeansClustering.getPointsWritable(TestKmeansClustering.reference);
        for (int i = 0; i < pointsWritable.size(); i++) {
            System.out.println("testKFuzzyKMeansMRJob k= " + i);
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < i + 1; i2++) {
                SoftCluster softCluster = new SoftCluster(tweakValue(pointsWritable.get(i2).get()), i2);
                softCluster.addPoint(softCluster.getCenter(), 1.0d);
                arrayList.add(softCluster);
            }
            FuzzyKMeansMapper fuzzyKMeansMapper = new FuzzyKMeansMapper();
            fuzzyKMeansMapper.config(arrayList);
            JobConf jobConf = new JobConf();
            jobConf.set("org.apache.mahout.clustering.kmeans.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
            jobConf.set("org.apache.mahout.clustering.kmeans.convergence", "0.001");
            jobConf.set("org.apache.mahout.clustering.fuzzykmeans.m", "2");
            fuzzyKMeansMapper.configure(jobConf);
            DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
            Iterator<VectorWritable> it = pointsWritable.iterator();
            while (it.hasNext()) {
                fuzzyKMeansMapper.map(new Text(), it.next(), dummyOutputCollector, (Reporter) null);
            }
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                ((SoftCluster) it2.next()).recomputeCenter();
            }
            DummyOutputCollector dummyOutputCollector2 = new DummyOutputCollector();
            FuzzyKMeansCombiner fuzzyKMeansCombiner = new FuzzyKMeansCombiner();
            fuzzyKMeansCombiner.configure(jobConf);
            for (String str : dummyOutputCollector.getKeys()) {
                fuzzyKMeansCombiner.reduce(new Text(str), dummyOutputCollector.getValue(str).iterator(), dummyOutputCollector2, (Reporter) null);
            }
            DummyOutputCollector dummyOutputCollector3 = new DummyOutputCollector();
            FuzzyKMeansReducer fuzzyKMeansReducer = new FuzzyKMeansReducer();
            fuzzyKMeansReducer.config(arrayList);
            fuzzyKMeansReducer.configure(jobConf);
            for (String str2 : dummyOutputCollector2.getKeys()) {
                fuzzyKMeansReducer.reduce(new Text(str2), dummyOutputCollector2.getValue(str2).iterator(), dummyOutputCollector3, (Reporter) null);
            }
            ArrayList arrayList2 = new ArrayList();
            Iterator<String> it3 = dummyOutputCollector3.getKeys().iterator();
            while (it3.hasNext()) {
                arrayList2.add(dummyOutputCollector3.getValue(it3.next()).get(0));
            }
            Iterator it4 = arrayList2.iterator();
            while (it4.hasNext()) {
                ((SoftCluster) it4.next()).recomputeCenter();
            }
            DummyOutputCollector dummyOutputCollector4 = new DummyOutputCollector();
            FuzzyKMeansClusterMapper fuzzyKMeansClusterMapper = new FuzzyKMeansClusterMapper();
            fuzzyKMeansClusterMapper.config(arrayList2);
            fuzzyKMeansClusterMapper.configure(jobConf);
            Iterator<VectorWritable> it5 = pointsWritable.iterator();
            while (it5.hasNext()) {
                fuzzyKMeansClusterMapper.map(new Text(), it5.next(), dummyOutputCollector4, (Reporter) null);
            }
            ArrayList arrayList3 = new ArrayList();
            for (int i3 = 0; i3 < i + 1; i3++) {
                arrayList3.add(new SoftCluster(tweakValue(pointsWritable.get(i3).get()), i3));
            }
            HashMap hashMap = new HashMap();
            ArrayList arrayList4 = new ArrayList();
            Iterator<VectorWritable> it6 = pointsWritable.iterator();
            while (it6.hasNext()) {
                arrayList4.add(it6.next().get());
            }
            List clusterPoints = FuzzyKMeansClusterer.clusterPoints(arrayList4, arrayList3, new EuclideanDistanceMeasure(), 0.001d, 2.0d, 1);
            computeCluster(arrayList4, (List) clusterPoints.get(clusterPoints.size() - 1), new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(), 0.001d, 2.0d), hashMap);
            for (String str3 : dummyOutputCollector4.getKeys()) {
                List value = dummyOutputCollector4.getValue(str3);
                String str4 = (String) hashMap.get(str3);
                String[] split = str4.substring(1, str4.length() - 1).split(" ");
                assertEquals("Number of clusters", i + 1, split.length);
                HashMap hashMap2 = new HashMap();
                for (String str5 : split) {
                    String[] split2 = str5.split(":");
                    hashMap2.put(split2[0], Double.valueOf(Double.parseDouble(split2[1])));
                }
                FuzzyKMeansOutput fuzzyKMeansOutput = (FuzzyKMeansOutput) value.get(0);
                SoftCluster[] clusters = fuzzyKMeansOutput.getClusters();
                double[] probabilities = fuzzyKMeansOutput.getProbabilities();
                assertEquals("Number of clusters", i + 1, clusters.length);
                for (String str6 : split) {
                    String[] split3 = str6.split(":");
                    System.out.println(i + " point:" + str3 + ": Cluster: " + split3[0] + " prob: " + Double.parseDouble(split3[1]));
                }
                for (int i4 = 0; i4 < clusters.length; i4++) {
                    Double d = (Double) hashMap2.get(String.valueOf(clusters[i4].getId()));
                    assertEquals(i + " point: " + str3 + ": expected probability: " + d + " was: " + probabilities[i4], d, Double.valueOf(probabilities[i4]));
                }
            }
        }
    }
}
