package org.apache.mahout.math.hadoop.decomposer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.cf.taste.hadoop.AbstractJob;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.MatrixSlice;
import org.apache.mahout.math.OrthonormalityVerifier;
import org.apache.mahout.math.SparseRowMatrix;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorIterable;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.decomposer.EigenStatus;
import org.apache.mahout.math.decomposer.SimpleEigenVerifier;
import org.apache.mahout.math.decomposer.SingularVectorVerifier;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/mahout-core-0.3.jar:org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.class */
public class EigenVerificationJob extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(EigenVerificationJob.class);
    private SingularVectorVerifier eigenVerifier;
    private OrthonormalityVerifier orthoVerifier;
    private VectorIterable eigensToVerify;
    private VectorIterable corpus;
    private double maxError;
    private double minEigenValue;
    private boolean loadEigensInMemory;
    private String tmpOut;
    private String outPath;

    public void setEigensToVerify(VectorIterable vectorIterable) {
        this.eigensToVerify = vectorIterable;
    }

    public int run(String[] strArr) throws Exception {
        Map<String, String> handleArgs = handleArgs(strArr);
        if (handleArgs == null) {
            return -1;
        }
        if (handleArgs.isEmpty()) {
            return 0;
        }
        this.outPath = handleArgs.get("--output");
        this.tmpOut = this.outPath + "/tmp";
        if (handleArgs.get("--eigenInput") != null && this.eigensToVerify == null) {
            prepareEigens(handleArgs.get("--eigenInput"), handleArgs.get("--inMemory") != null);
        }
        this.maxError = Double.parseDouble(handleArgs.get("--maxError"));
        this.minEigenValue = Double.parseDouble(handleArgs.get("--minEigenvalue"));
        DistributedRowMatrix distributedRowMatrix = new DistributedRowMatrix(handleArgs.get("--corpusInput"), this.tmpOut, 1, 1);
        distributedRowMatrix.configure(new JobConf(getConf()));
        this.corpus = distributedRowMatrix;
        this.eigenVerifier = new SimpleEigenVerifier();
        this.orthoVerifier = new OrthonormalityVerifier();
        computePairwiseInnerProducts();
        saveCleanEigens(pruneEigens(verifyEigens()));
        return 0;
    }

    public Map<String, String> handleArgs(String[] strArr) {
        Option buildOption = buildOption("eigenInput", "ei", "The Path for purported eigenVector input files (SequenceFile<WritableComparable,VectorWritable>.", null);
        Option buildOption2 = buildOption("corpusInput", "ci", "The Path for corpus input files (SequenceFile<WritableComparable,VectorWritable>.");
        Option create = DefaultOptionCreator.outputOption().create();
        Option helpOption = DefaultOptionCreator.helpOption();
        Option buildOption3 = buildOption("inMemory", "mem", "Buffer eigen matrix into memory (if you have enough!)", "false");
        Option buildOption4 = buildOption("maxError", "err", "Maximum acceptable error", "0.05");
        Option buildOption5 = buildOption("minEigenvalue", "mev", "Minimum eigenvalue to keep the vector for", "0.0");
        Group create2 = new GroupBuilder().withName("Options").withOption(buildOption).withOption(buildOption2).withOption(helpOption).withOption(create).withOption(buildOption3).withOption(buildOption4).withOption(buildOption5).create();
        HashMap hashMap = new HashMap();
        try {
            Parser parser = new Parser();
            parser.setGroup(create2);
            CommandLine parse = parser.parse(strArr);
            if (parse.hasOption(helpOption)) {
                CommandLineUtil.printHelp(create2);
                return hashMap;
            }
            maybePut(hashMap, parse, buildOption, buildOption2, helpOption, create, buildOption3, buildOption4, buildOption5);
            return hashMap;
        } catch (OptionException e) {
            log.error(e.getMessage());
            CommandLineUtil.printHelp(create2);
            return null;
        }
    }

    public VectorIterable computePairwiseInnerProducts() {
        return this.orthoVerifier.pairwiseInnerProducts(this.eigensToVerify);
    }

    public void saveCleanEigens(List<Map.Entry<MatrixSlice, EigenStatus>> list) throws IOException {
        Path path = new Path(this.outPath, "largestCleanEigens");
        Configuration conf = getConf();
        SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, path, IntWritable.class, VectorWritable.class);
        VectorWritable vectorWritable = new VectorWritable();
        IntWritable intWritable = new IntWritable();
        for (Map.Entry<MatrixSlice, EigenStatus> entry : list) {
            MatrixSlice key = entry.getKey();
            EigenStatus value = entry.getValue();
            EigenVector eigenVector = new EigenVector((DenseVector) key.vector(), value.getEigenValue(), Math.abs(1.0d - value.getCosAngle()), key.index());
            log.info("appending " + eigenVector.getName() + " to " + path.toString());
            vectorWritable.set(eigenVector);
            intWritable.set(key.index());
            writer.append(intWritable, vectorWritable);
        }
        writer.close();
    }

    public List<Map.Entry<MatrixSlice, EigenStatus>> pruneEigens(Map<MatrixSlice, EigenStatus> map) {
        ArrayList arrayList = new ArrayList();
        for (Map.Entry<MatrixSlice, EigenStatus> entry : map.entrySet()) {
            if (Math.abs(1.0d - entry.getValue().getCosAngle()) < this.maxError && entry.getValue().getEigenValue() > this.minEigenValue) {
                arrayList.add(entry);
            }
        }
        Collections.sort(arrayList, new Comparator<Map.Entry<MatrixSlice, EigenStatus>>() { // from class: org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob.1
            @Override // java.util.Comparator
            public int compare(Map.Entry<MatrixSlice, EigenStatus> entry2, Map.Entry<MatrixSlice, EigenStatus> entry3) {
                return entry2.getKey().index() - entry3.getKey().index();
            }
        });
        return arrayList;
    }

    public Map<MatrixSlice, EigenStatus> verifyEigens() {
        HashMap hashMap = new HashMap();
        for (MatrixSlice matrixSlice : this.eigensToVerify) {
            hashMap.put(matrixSlice, this.eigenVerifier.verify(this.corpus, matrixSlice.vector()));
        }
        return hashMap;
    }

    private void prepareEigens(String str, boolean z) {
        DistributedRowMatrix distributedRowMatrix = new DistributedRowMatrix(str, this.tmpOut, 1, 1);
        distributedRowMatrix.configure(new JobConf(getConf()));
        if (!z) {
            this.eigensToVerify = distributedRowMatrix;
            return;
        }
        ArrayList arrayList = new ArrayList();
        Iterator<MatrixSlice> it = distributedRowMatrix.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().vector());
        }
        this.eigensToVerify = new SparseRowMatrix(new int[]{arrayList.size(), ((Vector) arrayList.get(0)).size()}, (Vector[]) arrayList.toArray(new Vector[arrayList.size()]), true, true);
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new EigenVerificationJob(), strArr);
    }
}
