package org.apache.mahout.clustering.canopy;

import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.common.DummyOutputCollector;
import org.apache.mahout.common.DummyReporter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.common.distance.UserDefinedDistanceMeasure;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;

/* loaded from: input_file:org/apache/mahout/clustering/canopy/TestCanopyCreation.class */
public class TestCanopyCreation extends MahoutTestCase {
    private static final double[][] raw = {new double[]{1.0d, 1.0d}, new double[]{2.0d, 1.0d}, new double[]{1.0d, 2.0d}, new double[]{2.0d, 2.0d}, new double[]{3.0d, 3.0d}, new double[]{4.0d, 4.0d}, new double[]{5.0d, 4.0d}, new double[]{4.0d, 5.0d}, new double[]{5.0d, 5.0d}};
    private List<Canopy> referenceManhattan;
    private List<Vector> manhattanCentroids;
    private List<Canopy> referenceEuclidean;
    private List<Vector> euclideanCentroids;
    private FileSystem fs;
    private final DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
    private final DistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();

    private static List<VectorWritable> getPointsWritable(double[][] dArr) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (double[] dArr2 : dArr) {
            int i2 = i;
            i++;
            RandomAccessSparseVector randomAccessSparseVector = new RandomAccessSparseVector(String.valueOf(i2), dArr2.length);
            randomAccessSparseVector.assign(dArr2);
            arrayList.add(new VectorWritable(randomAccessSparseVector));
        }
        return arrayList;
    }

    private static List<Vector> getPoints(double[][] dArr) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (double[] dArr2 : dArr) {
            int i2 = i;
            i++;
            RandomAccessSparseVector randomAccessSparseVector = new RandomAccessSparseVector(String.valueOf(i2), dArr2.length);
            randomAccessSparseVector.assign(dArr2);
            arrayList.add(randomAccessSparseVector);
        }
        return arrayList;
    }

    private void verifyManhattanCanopies(List<Canopy> list) {
        verifyCanopies(list, this.referenceManhattan);
    }

    private void verifyEuclideanCanopies(List<Canopy> list) {
        verifyCanopies(list, this.referenceEuclidean);
    }

    private static void verifyCanopies(List<Canopy> list, List<Canopy> list2) {
        assertEquals("number of canopies", list2.size(), list.size());
        for (int i = 0; i < list.size(); i++) {
            Canopy canopy = list2.get(i);
            Canopy canopy2 = list.get(i);
            assertEquals("canopy points " + i, canopy.getNumPoints(), canopy2.getNumPoints());
            Vector computeCentroid = canopy.computeCentroid();
            Vector computeCentroid2 = canopy2.computeCentroid();
            for (int i2 = 0; i2 < computeCentroid.size(); i2++) {
                assertEquals("canopy centroid " + i + '[' + i2 + ']', Double.valueOf(computeCentroid.get(i2)), Double.valueOf(computeCentroid2.get(i2)));
            }
        }
    }

    private static void printCanopies(List<Canopy> list) {
        Iterator<Canopy> it = list.iterator();
        while (it.hasNext()) {
            System.out.println(it.next().toString());
        }
    }

    public static void rmr(String str) {
        File file = new File(str);
        if (file.exists()) {
            if (file.isDirectory()) {
                for (String str2 : file.list()) {
                    rmr(file.toString() + File.separator + str2);
                }
            }
            file.delete();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.mahout.common.MahoutTestCase
    public void setUp() throws Exception {
        super.setUp();
        this.fs = FileSystem.get(new Configuration());
        rmr("output");
        rmr("testdata");
        this.referenceManhattan = CanopyClusterer.createCanopies(getPoints(raw), this.manhattanDistanceMeasure, 3.1d, 2.1d);
        this.manhattanCentroids = CanopyClusterer.calculateCentroids(this.referenceManhattan);
        this.referenceEuclidean = CanopyClusterer.createCanopies(getPoints(raw), this.euclideanDistanceMeasure, 3.1d, 2.1d);
        this.euclideanCentroids = CanopyClusterer.calculateCentroids(this.referenceEuclidean);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void testReferenceManhattan() throws Exception {
        System.out.println("testReferenceManhattan");
        printCanopies(this.referenceManhattan);
        assertEquals("number of canopies", 3, this.referenceManhattan.size());
        for (int i = 0; i < this.referenceManhattan.size(); i++) {
            Canopy canopy = this.referenceManhattan.get(i);
            assertEquals("canopy points " + i, new int[]{4, 4, 3}[i], canopy.getNumPoints());
            Object[] objArr = new double[]{new double[]{1.5d, 1.5d}, new double[]{4.0d, 4.0d}, new double[]{4.666666666666667d, 4.666666666666667d}}[i];
            Vector computeCentroid = canopy.computeCentroid();
            for (int i2 = 0; i2 < objArr.length; i2++) {
                assertEquals("canopy centroid " + i + '[' + i2 + ']', Double.valueOf(objArr[i2]), Double.valueOf(computeCentroid.get(i2)));
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void testReferenceEuclidean() throws Exception {
        System.out.println("testReferenceEuclidean()");
        printCanopies(this.referenceEuclidean);
        assertEquals("number of canopies", 3, this.referenceManhattan.size());
        for (int i = 0; i < this.referenceManhattan.size(); i++) {
            Canopy canopy = this.referenceEuclidean.get(i);
            assertEquals("canopy points " + i, new int[]{5, 5, 3}[i], canopy.getNumPoints());
            Object[] objArr = new double[]{new double[]{1.8d, 1.8d}, new double[]{4.2d, 4.2d}, new double[]{4.666666666666667d, 4.666666666666667d}}[i];
            Vector computeCentroid = canopy.computeCentroid();
            for (int i2 = 0; i2 < objArr.length; i2++) {
                assertEquals("canopy centroid " + i + '[' + i2 + ']', Double.valueOf(objArr[i2]), Double.valueOf(computeCentroid.get(i2)));
            }
        }
    }

    public void testIterativeManhattan() throws Exception {
        List<Canopy> createCanopies = CanopyClusterer.createCanopies(getPoints(raw), new ManhattanDistanceMeasure(), 3.1d, 2.1d);
        System.out.println("testIterativeManhattan");
        printCanopies(createCanopies);
        verifyManhattanCanopies(createCanopies);
    }

    public void testIterativeEuclidean() throws Exception {
        List<Canopy> createCanopies = CanopyClusterer.createCanopies(getPoints(raw), new EuclideanDistanceMeasure(), 3.1d, 2.1d);
        System.out.println("testIterativeEuclidean");
        printCanopies(createCanopies);
        verifyEuclideanCanopies(createCanopies);
    }

    public void testCanopyMapperManhattan() throws Exception {
        CanopyMapper canopyMapper = new CanopyMapper();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        canopyMapper.configure(jobConf);
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        Iterator<VectorWritable> it = getPointsWritable(raw).iterator();
        while (it.hasNext()) {
            canopyMapper.map(new Text(), it.next(), dummyOutputCollector, new DummyReporter());
        }
        canopyMapper.close();
        assertEquals("Number of map results", 1, dummyOutputCollector.getData().size());
        List value = dummyOutputCollector.getValue("centroid");
        assertEquals("Number of centroids", 3, value.size());
        for (int i = 0; i < value.size(); i++) {
            assertEquals("Centroid error", this.manhattanCentroids.get(i).asFormatString(), ((VectorWritable) value.get(i)).get().asFormatString());
        }
    }

    public void testCanopyMapperEuclidean() throws Exception {
        CanopyMapper canopyMapper = new CanopyMapper();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        canopyMapper.configure(jobConf);
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        Iterator<VectorWritable> it = getPointsWritable(raw).iterator();
        while (it.hasNext()) {
            canopyMapper.map(new Text(), it.next(), dummyOutputCollector, new DummyReporter());
        }
        canopyMapper.close();
        assertEquals("Number of map results", 1, dummyOutputCollector.getData().size());
        List value = dummyOutputCollector.getValue("centroid");
        assertEquals("Number of centroids", 3, value.size());
        for (int i = 0; i < value.size(); i++) {
            assertEquals("Centroid error", this.euclideanCentroids.get(i).asFormatString(), ((VectorWritable) value.get(i)).get().asFormatString());
        }
    }

    public void testCanopyReducerManhattan() throws Exception {
        CanopyReducer canopyReducer = new CanopyReducer();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        canopyReducer.configure(jobConf);
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        canopyReducer.reduce(new Text("centroid"), getPointsWritable(raw).iterator(), dummyOutputCollector, new DummyReporter());
        canopyReducer.close();
        Set<String> keys = dummyOutputCollector.getKeys();
        assertEquals("Number of centroids", 3, keys.size());
        int i = 0;
        Iterator<String> it = keys.iterator();
        while (it.hasNext()) {
            List value = dummyOutputCollector.getValue(it.next());
            assertEquals(this.manhattanCentroids.get(i).asFormatString() + " is not equal to " + ((Canopy) value.get(0)).computeCentroid().asFormatString(), this.manhattanCentroids.get(i), ((Canopy) value.get(0)).computeCentroid());
            i++;
        }
    }

    public void testCanopyReducerEuclidean() throws Exception {
        CanopyReducer canopyReducer = new CanopyReducer();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        canopyReducer.configure(jobConf);
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        canopyReducer.reduce(new Text("centroid"), getPointsWritable(raw).iterator(), dummyOutputCollector, new DummyReporter());
        canopyReducer.close();
        Set<String> keys = dummyOutputCollector.getKeys();
        assertEquals("Number of centroids", 3, keys.size());
        int i = 0;
        Iterator<String> it = keys.iterator();
        while (it.hasNext()) {
            List value = dummyOutputCollector.getValue(it.next());
            assertEquals(this.euclideanCentroids.get(i).asFormatString() + " is not equal to " + ((Canopy) value.get(0)).computeCentroid().asFormatString(), this.euclideanCentroids.get(i), ((Canopy) value.get(0)).computeCentroid());
            i++;
        }
    }

    public void testCanopyGenManhattanMR() throws Exception {
        List<VectorWritable> pointsWritable = getPointsWritable(raw);
        File file = new File("testdata");
        if (!file.exists()) {
            file.mkdir();
        }
        JobConf jobConf = new JobConf(CanopyDriver.class);
        jobConf.setMapOutputValueClass(pointsWritable.get(0).getClass());
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file1", this.fs, jobConf);
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file2", this.fs, jobConf);
        CanopyDriver.runJob("testdata", "output/canopies", ManhattanDistanceMeasure.class.getName(), 3.1d, 2.1d);
        Path path = new Path("output/canopies/part-00000");
        SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(path.toUri(), jobConf), path, jobConf);
        Text text = new Text();
        Canopy canopy = new Canopy();
        assertTrue("more to come", reader.next(text, canopy));
        assertEquals("1st key", "C0", text.toString());
        assertEquals("1st x value", Double.valueOf(1.5d), Double.valueOf(canopy.getCenter().get(0)));
        assertEquals("1st y value", Double.valueOf(1.5d), Double.valueOf(canopy.getCenter().get(1)));
        assertTrue("more to come", reader.next(text, canopy));
        assertEquals("2nd key", "C1", text.toString());
        assertEquals("1st x value", Double.valueOf(4.333333333333334d), Double.valueOf(canopy.getCenter().get(0)));
        assertEquals("1st y value", Double.valueOf(4.333333333333334d), Double.valueOf(canopy.getCenter().get(1)));
        assertFalse("more to come", reader.next(text, canopy));
        reader.close();
    }

    public void testCanopyGenEuclideanMR() throws Exception {
        List<VectorWritable> pointsWritable = getPointsWritable(raw);
        File file = new File("testdata");
        if (!file.exists()) {
            file.mkdir();
        }
        JobConf jobConf = new JobConf(CanopyDriver.class);
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file1", this.fs, jobConf);
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file2", this.fs, jobConf);
        CanopyDriver.runJob("testdata", "output/canopies", EuclideanDistanceMeasure.class.getName(), 3.1d, 2.1d);
        Path path = new Path("output/canopies/part-00000");
        SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(path.toUri(), jobConf), path, jobConf);
        Text text = new Text();
        Canopy canopy = new Canopy();
        assertTrue("more to come", reader.next(text, canopy));
        assertEquals("1st key", "C0", text.toString());
        assertEquals("1st x value", Double.valueOf(1.8d), Double.valueOf(canopy.getCenter().get(0)));
        assertEquals("1st y value", Double.valueOf(1.8d), Double.valueOf(canopy.getCenter().get(1)));
        assertTrue("more to come", reader.next(text, canopy));
        assertEquals("2nd key", "C1", text.toString());
        assertEquals("1st x value", Double.valueOf(4.433333333333334d), Double.valueOf(canopy.getCenter().get(0)));
        assertEquals("1st y value", Double.valueOf(4.433333333333334d), Double.valueOf(canopy.getCenter().get(1)));
        assertFalse("more to come", reader.next(text, canopy));
        reader.close();
    }

    public void testClusterMapperManhattan() throws Exception {
        ClusterMapper clusterMapper = new ClusterMapper();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        clusterMapper.configure(jobConf);
        ArrayList arrayList = new ArrayList();
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        int i = 0;
        Iterator<Vector> it = this.manhattanCentroids.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            arrayList.add(new Canopy(it.next(), i2));
        }
        clusterMapper.config(arrayList);
        Iterator<VectorWritable> it2 = getPointsWritable(raw).iterator();
        while (it2.hasNext()) {
            clusterMapper.map(new Text(), it2.next(), dummyOutputCollector, new DummyReporter());
        }
        Map data = dummyOutputCollector.getData();
        assertEquals("Number of map results", arrayList.size(), data.size());
        for (Map.Entry entry : data.entrySet()) {
            Canopy findCanopy = findCanopy((String) entry.getKey(), arrayList);
            Iterator it3 = ((List) entry.getValue()).iterator();
            while (it3.hasNext()) {
                assertTrue("Point not in canopy", clusterMapper.canopyCovers(findCanopy, ((VectorWritable) it3.next()).get()));
            }
        }
    }

    private static Canopy findCanopy(String str, List<Canopy> list) {
        for (Canopy canopy : list) {
            if (canopy.getIdentifier().equals(str)) {
                return canopy;
            }
        }
        return null;
    }

    public void testClusterMapperEuclidean() throws Exception {
        ClusterMapper clusterMapper = new ClusterMapper();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        clusterMapper.configure(jobConf);
        ArrayList arrayList = new ArrayList();
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        int i = 0;
        Iterator<Vector> it = this.euclideanCentroids.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            arrayList.add(new Canopy(it.next(), i2));
        }
        clusterMapper.config(arrayList);
        Iterator<VectorWritable> it2 = getPointsWritable(raw).iterator();
        while (it2.hasNext()) {
            clusterMapper.map(new Text(), it2.next(), dummyOutputCollector, new DummyReporter());
        }
        Map data = dummyOutputCollector.getData();
        assertEquals("Number of map results", arrayList.size(), data.size());
        for (Map.Entry entry : data.entrySet()) {
            Canopy findCanopy = findCanopy((String) entry.getKey(), arrayList);
            Iterator it3 = ((List) entry.getValue()).iterator();
            while (it3.hasNext()) {
                assertTrue("Point not in canopy", clusterMapper.canopyCovers(findCanopy, ((VectorWritable) it3.next()).get()));
            }
        }
    }

    public void testClusterReducerManhattan() throws Exception {
        ClusterMapper clusterMapper = new ClusterMapper();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        clusterMapper.configure(jobConf);
        ArrayList arrayList = new ArrayList();
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        int i = 0;
        Iterator<Vector> it = this.manhattanCentroids.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            arrayList.add(new Canopy(it.next(), i2));
        }
        clusterMapper.config(arrayList);
        Iterator<VectorWritable> it2 = getPointsWritable(raw).iterator();
        while (it2.hasNext()) {
            clusterMapper.map(new Text(), it2.next(), dummyOutputCollector, new DummyReporter());
        }
        Map data = dummyOutputCollector.getData();
        assertEquals("Number of map results", arrayList.size(), data.size());
        IdentityReducer identityReducer = new IdentityReducer();
        DummyOutputCollector dummyOutputCollector2 = new DummyOutputCollector();
        for (Map.Entry entry : data.entrySet()) {
            identityReducer.reduce(new Text((String) entry.getKey()), ((List) entry.getValue()).iterator(), dummyOutputCollector2, (Reporter) null);
        }
        for (Map.Entry entry2 : dummyOutputCollector2.getData().entrySet()) {
            Canopy findCanopy = findCanopy((String) entry2.getKey(), arrayList);
            Iterator it3 = ((List) entry2.getValue()).iterator();
            while (it3.hasNext()) {
                assertTrue("Point not in canopy", clusterMapper.canopyCovers(findCanopy, ((VectorWritable) it3.next()).get()));
            }
        }
    }

    public void testClusterReducerEuclidean() throws Exception {
        ClusterMapper clusterMapper = new ClusterMapper();
        JobConf jobConf = new JobConf();
        jobConf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
        jobConf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1d));
        jobConf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1d));
        clusterMapper.configure(jobConf);
        ArrayList arrayList = new ArrayList();
        DummyOutputCollector dummyOutputCollector = new DummyOutputCollector();
        int i = 0;
        Iterator<Vector> it = this.euclideanCentroids.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            arrayList.add(new Canopy(it.next(), i2));
        }
        clusterMapper.config(arrayList);
        Iterator<VectorWritable> it2 = getPointsWritable(raw).iterator();
        while (it2.hasNext()) {
            clusterMapper.map(new Text(), it2.next(), dummyOutputCollector, new DummyReporter());
        }
        Map data = dummyOutputCollector.getData();
        IdentityReducer identityReducer = new IdentityReducer();
        DummyOutputCollector dummyOutputCollector2 = new DummyOutputCollector();
        for (Map.Entry entry : data.entrySet()) {
            identityReducer.reduce(new Text((String) entry.getKey()), ((List) entry.getValue()).iterator(), dummyOutputCollector2, (Reporter) null);
        }
        Map data2 = dummyOutputCollector2.getData();
        assertEquals("Number of map results", arrayList.size(), data2.size());
        for (Map.Entry entry2 : data2.entrySet()) {
            Canopy findCanopy = findCanopy((String) entry2.getKey(), arrayList);
            Iterator it3 = ((List) entry2.getValue()).iterator();
            while (it3.hasNext()) {
                assertTrue("Point not in canopy", clusterMapper.canopyCovers(findCanopy, ((VectorWritable) it3.next()).get()));
            }
        }
    }

    public void testClusteringManhattanMR() throws Exception {
        List<VectorWritable> pointsWritable = getPointsWritable(raw);
        File file = new File("testdata");
        if (!file.exists()) {
            file.mkdir();
        }
        JobConf jobConf = new JobConf();
        jobConf.setMapOutputValueClass(pointsWritable.get(0).getClass());
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file1", this.fs, jobConf);
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file2", this.fs, jobConf);
        CanopyClusteringJob.runJob("testdata", "output", ManhattanDistanceMeasure.class.getName(), 3.1d, 2.1d);
        SequenceFile.Reader reader = new SequenceFile.Reader(this.fs, new Path("output/clusters/part-00000"), jobConf);
        int i = 0;
        Text text = new Text();
        VectorWritable vectorWritable = new VectorWritable();
        while (reader.next(text, vectorWritable)) {
            i++;
            System.out.println("Txt: " + text + " Vec: " + vectorWritable.get().asFormatString());
        }
        assertEquals("number of points", 2 + (2 * pointsWritable.size()), i);
        reader.close();
    }

    public void testClusteringEuclideanMR() throws Exception {
        List<VectorWritable> pointsWritable = getPointsWritable(raw);
        File file = new File("testdata");
        if (!file.exists()) {
            file.mkdir();
        }
        Configuration configuration = new Configuration();
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file1", this.fs, configuration);
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file2", this.fs, configuration);
        CanopyClusteringJob.runJob("testdata", "output", EuclideanDistanceMeasure.class.getName(), 3.1d, 2.1d);
        SequenceFile.Reader reader = new SequenceFile.Reader(this.fs, new Path("output/clusters/part-00000"), configuration);
        int i = 0;
        while (reader.next(new Text(), new VectorWritable())) {
            i++;
        }
        assertEquals("number of points", 2 + (2 * pointsWritable.size()), i);
        reader.close();
    }

    public void testUserDefinedDistanceMeasure() throws Exception {
        List<VectorWritable> pointsWritable = getPointsWritable(raw);
        File file = new File("testdata");
        if (!file.exists()) {
            file.mkdir();
        }
        Configuration configuration = new Configuration();
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file1", this.fs, configuration);
        ClusteringTestUtils.writePointsToFile(pointsWritable, "testdata/file2", this.fs, configuration);
        CanopyDriver.runJob("testdata", "output/canopies", UserDefinedDistanceMeasure.class.getName(), 3.1d, 2.1d);
        JobConf jobConf = new JobConf(CanopyDriver.class);
        Path path = new Path("output/canopies/part-00000");
        SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(path.toUri(), jobConf), path, jobConf);
        Text text = new Text();
        Canopy canopy = new Canopy();
        assertTrue("more to come", reader.next(text, canopy));
        assertEquals("1st key", "C0", text.toString());
        assertEquals("1st x value", Double.valueOf(1.5d), Double.valueOf(canopy.getCenter().get(0)));
        assertEquals("1st y value", Double.valueOf(1.5d), Double.valueOf(canopy.getCenter().get(1)));
        assertTrue("more to come", reader.next(text, canopy));
        assertEquals("2nd key", "C1", text.toString());
        assertEquals("1st x value", Double.valueOf(4.333333333333334d), Double.valueOf(canopy.getCenter().get(0)));
        assertEquals("1st y value", Double.valueOf(4.333333333333334d), Double.valueOf(canopy.getCenter().get(1)));
        assertFalse("more to come", reader.next(text, canopy));
        reader.close();
    }
}
