package org.apache.flink.ml.examples.feature;

import java.util.Arrays;
import java.util.List;
import org.apache.commons.collections.IteratorUtils;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.ml.feature.lsh.MinHashLSH;
import org.apache.flink.ml.feature.lsh.MinHashLSHModel;
import org.apache.flink.ml.linalg.DenseVector;
import org.apache.flink.ml.linalg.SparseVector;
import org.apache.flink.ml.linalg.Vector;
import org.apache.flink.ml.linalg.Vectors;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Expressions;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.expressions.Expression;
import org.apache.flink.types.Row;

/* loaded from: input_file:org/apache/flink/ml/examples/feature/MinHashLSHExample.class */
public class MinHashLSHExample {
    public static void main(String[] strArr) throws Exception {
        StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment create = StreamTableEnvironment.create(executionEnvironment);
        Table fromDataStream = create.fromDataStream(executionEnvironment.fromCollection(Arrays.asList(Row.of(new Object[]{0, Vectors.sparse(6, new int[]{0, 1, 2}, new double[]{1.0d, 1.0d, 1.0d})}), Row.of(new Object[]{1, Vectors.sparse(6, new int[]{2, 3, 4}, new double[]{1.0d, 1.0d, 1.0d})}), Row.of(new Object[]{2, Vectors.sparse(6, new int[]{0, 2, 4}, new double[]{1.0d, 1.0d, 1.0d})})), Types.ROW_NAMED(new String[]{"id", "vec"}, new TypeInformation[]{Types.INT, TypeInformation.of(SparseVector.class)})));
        Table fromDataStream2 = create.fromDataStream(executionEnvironment.fromCollection(Arrays.asList(Row.of(new Object[]{3, Vectors.sparse(6, new int[]{1, 3, 5}, new double[]{1.0d, 1.0d, 1.0d})}), Row.of(new Object[]{4, Vectors.sparse(6, new int[]{2, 3, 5}, new double[]{1.0d, 1.0d, 1.0d})}), Row.of(new Object[]{5, Vectors.sparse(6, new int[]{1, 2, 4}, new double[]{1.0d, 1.0d, 1.0d})})), Types.ROW_NAMED(new String[]{"id", "vec"}, new TypeInformation[]{Types.INT, TypeInformation.of(SparseVector.class)})));
        MinHashLSH minHashLSH = (MinHashLSH) ((MinHashLSH) ((MinHashLSH) ((MinHashLSH) new MinHashLSH().setInputCol("vec")).setOutputCol("hashes")).setSeed(2022L)).setNumHashTables(5);
        MinHashLSHModel fit = minHashLSH.fit(new Table[]{fromDataStream});
        Table table = fit.transform(new Table[]{fromDataStream})[0];
        List columnNames = table.getResolvedSchema().getColumnNames();
        for (Row row : IteratorUtils.toList(create.toDataStream(table).executeAndCollect())) {
            System.out.printf("Vector: %s \tHash values: %s\n", (Vector) row.getFieldAs(columnNames.indexOf(minHashLSH.getInputCol())), Arrays.toString((DenseVector[]) row.getFieldAs(columnNames.indexOf(minHashLSH.getOutputCol()))));
        }
        for (Row row2 : IteratorUtils.toList(create.toDataStream(fit.approxNearestNeighbors(fromDataStream, Vectors.sparse(6, new int[]{1, 3}, new double[]{1.0d, 1.0d}), 2).select(new Expression[]{Expressions.$("id"), Expressions.$("distCol")})).executeAndCollect())) {
            System.out.printf("ID: %d \tDistance: %f\n", Integer.valueOf(((Integer) row2.getFieldAs(columnNames.indexOf("id"))).intValue()), Double.valueOf(((Double) row2.getFieldAs(row2.getArity() - 1)).doubleValue()));
        }
        for (Row row3 : IteratorUtils.toList(create.toDataStream(fit.approxSimilarityJoin(fromDataStream, fromDataStream2, 0.6d, "id")).executeAndCollect())) {
            System.out.printf("ID from left: %d \tID from right: %d \t Distance: %f\n", Integer.valueOf(((Integer) row3.getFieldAs(0)).intValue()), Integer.valueOf(((Integer) row3.getFieldAs(1)).intValue()), Double.valueOf(((Double) row3.getFieldAs(2)).doubleValue()));
        }
    }
}
