package net.maizegenetics.pangenome.hapcollapse;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multimap;
import java.awt.Frame;
import java.io.PrintWriter;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.swing.ImageIcon;
import net.maizegenetics.analysis.data.SortTaxaAlphabeticallyPlugin;
import net.maizegenetics.analysis.distance.IBSDistanceMatrix;
import net.maizegenetics.analysis.distance.KinshipPlugin;
import net.maizegenetics.analysis.distance.RemoveNaNFromDistanceMatrixPlugin;
import net.maizegenetics.dna.map.GenomeSequence;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.dna.map.PositionList;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.Datum;
import net.maizegenetics.plugindef.Plugin;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.taxa.distance.DistanceMatrix;
import net.maizegenetics.taxa.tree.TreeClusters;
import net.maizegenetics.taxa.tree.UPGMATree;
import net.maizegenetics.util.ProgressListener;
import net.maizegenetics.util.Tuple;
import org.apache.log4j.Logger;

/* loaded from: input_file:net/maizegenetics/pangenome/hapcollapse/FindHaplotypeClustersPlugin.class */
public class FindHaplotypeClustersPlugin extends AbstractPlugin {
    private static final Logger myLogger = Logger.getLogger(FindHaplotypeClustersPlugin.class);
    private PluginParameter<String> vcfDir;
    private PluginParameter<String> outFile;
    private PluginParameter<Double> maxDistFromFounder;
    private PluginParameter<Double> seqErrorRate;
    private PluginParameter<Integer> minSiteForComp;
    private PluginParameter<Double> minTaxaCoverage;
    private PluginParameter<Integer> minTaxaInGroup;
    private PluginParameter<GenomeSequence> referenceSequence;
    private PluginParameter<String> sequenceOutDir;
    private PluginParameter<String> intervalFile;
    private PluginParameter<CLUSTER_METHOD> clusterMethod;
    private PluginParameter<Double> maxError;
    private PluginParameter<Boolean> replaceNsWithMajor;
    private PluginParameter<Boolean> useDepthForCalls;

    /* loaded from: input_file:net/maizegenetics/pangenome/hapcollapse/FindHaplotypeClustersPlugin$CLUSTER_METHOD.class */
    public enum CLUSTER_METHOD {
        coverage,
        upgma
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:net/maizegenetics/pangenome/hapcollapse/FindHaplotypeClustersPlugin$CallAndAlleleDepth.class */
    public class CallAndAlleleDepth {
        private final byte call;
        private final int[] depths;

        private CallAndAlleleDepth() {
            throw new IllegalStateException("Cannot initialize CallAndAlleleDepth with Default Constructor");
        }

        public CallAndAlleleDepth(byte b, int[] iArr) {
            this.call = b;
            this.depths = iArr;
        }

        public byte getCall() {
            return this.call;
        }

        public int[] getDepths() {
            return this.depths;
        }
    }

    public FindHaplotypeClustersPlugin(Frame frame, boolean z) {
        super(frame, z);
        this.vcfDir = new PluginParameter.Builder("i", (Object) null, String.class).guiName("Target directory").inDir().required(false).description("Input genotypes to generate haplotypes from. Usually best to use all available samples from a species. Accepts all file types supported by TASSEL5.").build();
        this.outFile = new PluginParameter.Builder("o", (Object) null, String.class).guiName("Donor dir/file basename").outDir().required(false).description("Output file directory name, or new directory path; Directory will be created, if doesn't exist. Outfiles will be placed in the directory").build();
        this.maxDistFromFounder = new PluginParameter.Builder("mxDiv", Double.valueOf(0.01d), Double.class).guiName("Max divergence from founder").description("Maximum genetic divergence from founder haplotype to cluster sequences").build();
        this.seqErrorRate = new PluginParameter.Builder("seqErr", Double.valueOf(0.01d), Double.class).guiName("Sequencing error rate").description("Error rate used to merge alleles call hets versus homozygous").build();
        this.minSiteForComp = new PluginParameter.Builder("minSites", 20, Integer.class).guiName("Min sites to cluster").description("The minimum number of sites present in two taxa to compare genetic distance to evaluate similarity for clustering").build();
        this.minTaxaCoverage = new PluginParameter.Builder("minTaxaCoverage", Double.valueOf(0.5d), Double.class).guiName("Min taxa coverage").description("The minimum proportion of sites present in a taxa to go into clustering").build();
        this.minTaxaInGroup = new PluginParameter.Builder("minTaxa", 2, Integer.class).guiName("Min taxa to generate a haplotype").description("Minimum number of taxa to generate a haplotype").build();
        this.referenceSequence = new PluginParameter.Builder("ref", (Object) null, GenomeSequence.class).guiName("Reference Genome Sequence").required(false).description("Reference Genome Sequence to use to extract the fasta").build();
        this.sequenceOutDir = new PluginParameter.Builder("seqOutDir", (Object) null, String.class).guiName("Sequence Output Directory").outDir().required(false).description("Output Directory For storing the sequence files").build();
        this.intervalFile = new PluginParameter.Builder("intervalFile", (Object) null, String.class).guiName("Interval File").inFile().required(false).description("Interval File used to create the VCF file").build();
        this.clusterMethod = new PluginParameter.Builder("method", CLUSTER_METHOD.coverage, CLUSTER_METHOD.class).guiName("Cluster Method").description("The method used to cluster taxa. Coverage seeds the first cluster with the highest coverage taxon. UPGMA builds a UPGMA tree then cuts it at maxDistance.").required(false).build();
        this.maxError = new PluginParameter.Builder("maxError", Double.valueOf(0.2d), Double.class).guiName("Maximum error").description("Maximum error allowed to create a homozygous call.  If the error rate is above this value N or Major allele will be exported for that site").required(false).build();
        this.replaceNsWithMajor = new PluginParameter.Builder("replaceNsWithMajor", true, Boolean.class).guiName("Replace N calls with Major Allele").description("Boolean flag to replace any N calls with a Major Homozygous Diploid Value").required(false).build();
        this.useDepthForCalls = new PluginParameter.Builder("useDepthForCalls", false, Boolean.class).guiName("Use Depth For Calls").description("Boolean flag to have the clustering algorithm use depth information instead of allele counts").required(false).build();
    }

    public DataSet processData(DataSet dataSet) {
        List dataOfType = dataSet.getDataOfType(GenotypeTable.class);
        myLogger.info("Number of GenotypeTables: " + dataOfType.size());
        return new DataSet((List) dataOfType.stream().map(datum -> {
            return (GenotypeTable) datum.getData();
        }).map(genotypeTable -> {
            return (GenotypeTable) ((Datum) new SortTaxaAlphabeticallyPlugin((Frame) null, false).performFunction(new DataSet(new Datum("inputAlignment", genotypeTable, "Genotype Table"), (Plugin) null)).getDataOfType(GenotypeTable.class).get(0)).getData();
        }).flatMap(genotypeTable2 -> {
            myLogger.info("NumberOfSites:" + genotypeTable2.numberOfSites());
            Multimap<Taxon, Taxon> multimap = null;
            if (this.clusterMethod.value() == CLUSTER_METHOD.coverage) {
                multimap = findHapClusterHighCoverage(genotypeTable2, minSiteForComp().intValue(), maxDistFromFounder().doubleValue());
                myLogger.info("findhHapClusterHighCoverage num clusters returned: " + multimap.keySet().size());
            } else if (this.clusterMethod.value() == CLUSTER_METHOD.upgma) {
                multimap = findHapClustersFromTree(genotypeTable2, minSiteForComp().intValue(), maxDistFromFounder().doubleValue());
                myLogger.info("findhHapClusterFromTree num clusters returned: " + multimap.keySet().size());
            }
            List list = (List) multimap.asMap().entrySet().stream().filter(entry -> {
                return ((Collection) entry.getValue()).size() >= minTaxaInGroup().intValue();
            }).map(entry2 -> {
                return consensusGameteCalls(genotypeTable2, (Collection) entry2.getValue(), maxError().doubleValue(), useDepthForCalls().booleanValue(), replaceNsWithMajor().booleanValue());
            }).collect(Collectors.toList());
            myLogger.info("findHapClusterFromTree: number of GenotypeTables at end " + list.size());
            return list.stream();
        }).map(genotypeTable3 -> {
            return new Datum(getDatumName(genotypeTable3), genotypeTable3, (String) null);
        }).collect(Collectors.toList()), this);
    }

    private String getDatumName(GenotypeTable genotypeTable) {
        StringBuilder sb = new StringBuilder();
        Position position = (Position) genotypeTable.positions().get(0);
        return sb.append("chr").append(position.getChromosome().getName()).append("_stPos").append(position.getPosition()).append("_").append(genotypeTable.taxaName(0)).toString();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Multimap<Taxon, Taxon> findHapClusterHighCoverage(GenotypeTable genotypeTable, int i, double d) {
        TaxaList taxa = genotypeTable.taxa();
        if (1 != 0) {
            if (genotypeTable.positions().size() == 0) {
                myLogger.info("GenotypeTable does not have any positions.");
            } else {
                myLogger.info("Alignment Locus:" + genotypeTable.chromosome(0) + " StartPos:" + genotypeTable.chromosomalPosition(0) + " Filtered taxa:" + genotypeTable.numberOfTaxa() + " Filtered sites:" + genotypeTable.numberOfSites());
            }
        }
        List<Tuple<Taxon, Integer>> createPresentRanking = createPresentRanking(genotypeTable, i);
        if (1 != 0) {
            myLogger.info("\tTaxa with sufficient coverage:" + createPresentRanking.size());
        }
        HashMultimap create = HashMultimap.create();
        Set set = (Set) createPresentRanking.stream().map((v0) -> {
            return v0.getX();
        }).collect(Collectors.toSet());
        Iterator<Tuple<Taxon, Integer>> it = createPresentRanking.iterator();
        while (it.hasNext()) {
            Taxon taxon = (Taxon) it.next().getX();
            if (set.contains(taxon)) {
                set.remove(taxon);
                create.put(taxon, taxon);
                List list = (List) set.stream().filter(taxon2 -> {
                    double[] computeHetBitDistances = IBSDistanceMatrix.computeHetBitDistances(genotypeTable, taxa.indexOf(taxon), taxa.indexOf(taxon2), i);
                    return !Double.isNaN(computeHetBitDistances[0]) && computeHetBitDistances[0] < d;
                }).collect(Collectors.toList());
                create.putAll(taxon, list);
                set.removeAll(list);
            }
        }
        myLogger.info("Found clusters:");
        return create;
    }

    protected static Multimap<Taxon, Taxon> findHapClustersFromTree(GenotypeTable genotypeTable, int i, double d) {
        HashMultimap create = HashMultimap.create();
        GenotypeTable removeIndels = removeIndels(genotypeTable);
        removeIndels.taxa();
        if (1 != 0) {
            if (genotypeTable.positions().size() == 0) {
                myLogger.info("GenotypeTable does not have any positions.");
            } else {
                myLogger.info("Alignment Locus:" + genotypeTable.chromosome(0) + " StartPos:" + genotypeTable.chromosomalPosition(0) + " Filtered taxa:" + removeIndels.numberOfTaxa() + " Filtered sites:" + removeIndels.numberOfSites());
            }
        }
        DistanceMatrix iBSDistanceMatrix = IBSDistanceMatrix.getInstance(genotypeTable, 5, (ProgressListener) null);
        DistanceMatrix runPlugin = RemoveNaNFromDistanceMatrixPlugin.runPlugin(iBSDistanceMatrix);
        myLogger.info("FindHaplotypeClusters:findHapClustersFromTree: dm.size " + iBSDistanceMatrix.getSize() + ", dm2.size " + runPlugin.getSize());
        PrintWriter printWriter = null;
        if (0 != 0 && runPlugin.getSize() > 0) {
            printWriter.printf("After removing indels range starting at chr %s, pos %d has %d sites and %d taxa%n", removeIndels.chromosome(0).getName(), Integer.valueOf(removeIndels.chromosomalPosition(0)), Integer.valueOf(removeIndels.numberOfSites()), Integer.valueOf(removeIndels.numberOfTaxa()));
            printWriter.printf("dm size = %d; after removing NaNs size = %d%n", Integer.valueOf(iBSDistanceMatrix.getSize()), Integer.valueOf(runPlugin.getSize()));
        }
        if (runPlugin.getSize() == 1) {
            create.put(runPlugin.getTaxon(0), runPlugin.getTaxon(0));
        }
        if (runPlugin.getSize() < 2) {
            return create;
        }
        UPGMATree uPGMATree = new UPGMATree(runPlugin);
        TreeClusters treeClusters = new TreeClusters(uPGMATree);
        int[] groups = treeClusters.getGroups(d);
        if (0 != 0) {
            HashMultiset create2 = HashMultiset.create();
            for (int i2 : groups) {
                create2.add(Integer.valueOf(i2));
            }
            printWriter.printf("clustering at maxDistance %1.3e created %d groups with sizes: %s%n", Double.valueOf(d), Integer.valueOf(create2.elementSet().size()), (String) create2.entrySet().stream().map(entry -> {
                return Integer.toString(entry.getCount());
            }).collect(Collectors.joining(",")));
            for (int i3 = 1; i3 < 10; i3++) {
                double pow = d / Math.pow(2.0d, i3);
                int[] groups2 = treeClusters.getGroups(pow);
                HashMultiset create3 = HashMultiset.create();
                for (int i4 : groups2) {
                    create3.add(Integer.valueOf(i4));
                }
                printWriter.printf("clustering using maxDistance = %1.3e would have created %d groups with sizes: %s %n", Double.valueOf(pow), Integer.valueOf(create3.elementSet().size()), (String) create3.entrySet().stream().map(entry2 -> {
                    return Integer.toString(entry2.getCount());
                }).collect(Collectors.joining(",")));
            }
        }
        List list = (List) Arrays.stream(groups).mapToObj(i5 -> {
            return uPGMATree.getExternalNode(i5).getIdentifier();
        }).collect(Collectors.toList());
        HashMap hashMap = new HashMap();
        for (int i6 = 0; i6 < groups.length; i6++) {
            if (!hashMap.containsKey(Integer.valueOf(groups[i6]))) {
                hashMap.put(Integer.valueOf(groups[i6]), list.get(i6));
            }
        }
        for (int i7 = 0; i7 < groups.length; i7++) {
            create.put(hashMap.get(Integer.valueOf(groups[i7])), uPGMATree.getExternalNode(i7).getIdentifier());
        }
        if (0 != 0) {
            printWriter.close();
        }
        return create;
    }

    private static GenotypeTable removeIndels(GenotypeTable genotypeTable) {
        GenotypeTableBuilder siteIncremental = GenotypeTableBuilder.getSiteIncremental(genotypeTable.taxa());
        PositionList positions = genotypeTable.positions();
        for (int i = 0; i < positions.size(); i++) {
            byte[] genotypeAllTaxa = genotypeTable.genotypeAllTaxa(i);
            if (isNonIndel(genotypeAllTaxa)) {
                siteIncremental.addSite((Position) positions.get(i), genotypeAllTaxa);
            }
        }
        return siteIncremental.build();
    }

    private static boolean isNonIndel(byte[] bArr) {
        boolean z = true;
        if (bArr.length == 0) {
            return false;
        }
        byte b = bArr[0];
        for (int i = 1; i < bArr.length; i++) {
            if (bArr[i] != b) {
                z = true;
            }
            if (bArr[i] == 68 || bArr[i] == 85) {
                return false;
            }
        }
        return z;
    }

    private static List<Tuple<Taxon, Integer>> createPresentRanking(GenotypeTable genotypeTable, int i) {
        List<Tuple<Taxon, Integer>> list = (List) IntStream.range(0, genotypeTable.numberOfTaxa()).mapToObj(i2 -> {
            return new Tuple(genotypeTable.taxa().get(i2), Integer.valueOf(genotypeTable.totalGametesNonMissingForTaxon(i2)));
        }).filter(tuple -> {
            return ((Integer) tuple.getY()).intValue() > i;
        }).collect(Collectors.toList());
        list.sort(Comparator.comparing((v0) -> {
            return v0.getY();
        }, Comparator.reverseOrder()));
        myLogger.debug("found present ranking");
        return list;
    }

    private GenotypeTable consensusGameteCalls(GenotypeTable genotypeTable, Collection<Taxon> collection, double d, boolean z, boolean z2) {
        GenotypeTableBuilder taxaIncremental = GenotypeTableBuilder.getTaxaIncremental(genotypeTable.positions());
        Taxon taxon = new Taxon(createConsensusTaxonName(collection));
        List<Integer> list = (List) collection.stream().map(taxon2 -> {
            return Integer.valueOf(genotypeTable.taxa().indexOf(taxon2));
        }).collect(Collectors.toList());
        byte[] bArr = new byte[genotypeTable.positions().size()];
        int[][] iArr = new int[6][genotypeTable.positions().size()];
        for (int i = 0; i < genotypeTable.positions().size(); i++) {
            bArr[i] = consensusGameteCallAndCounts(genotypeTable, list, i, d, z, z2).call;
        }
        taxaIncremental.addTaxon(taxon, bArr);
        return taxaIncremental.build();
    }

    private CallAndAlleleDepth consensusGameteCallAndCounts(GenotypeTable genotypeTable, List<Integer> list, int i, double d, boolean z, boolean z2) {
        int[] iArr = new int[6];
        for (int i2 = 0; i2 < list.size(); i2++) {
            byte[] genotypeArray = genotypeTable.genotypeArray(list.get(i2).intValue(), i);
            if (genotypeArray[0] != 15) {
                byte b = genotypeArray[0];
                iArr[b] = iArr[b] + 1;
            }
            if (genotypeArray[1] != 15) {
                byte b2 = genotypeArray[1];
                iArr[b2] = iArr[b2] + 1;
            }
        }
        byte b3 = 15;
        int i3 = 0;
        int i4 = 0;
        for (int i5 = 0; i5 < iArr.length; i5++) {
            i4 += iArr[i5];
            if (iArr[i5] > i3) {
                i3 = iArr[i5];
                b3 = (byte) i5;
            }
        }
        byte unphasedDiploidValue = 1.0d - (i3 / i4) < d ? GenotypeTableUtils.getUnphasedDiploidValue(b3, b3) : (byte) -1;
        if (unphasedDiploidValue == -1 && z2) {
            byte majorAllele = genotypeTable.majorAllele(i);
            unphasedDiploidValue = GenotypeTableUtils.getUnphasedDiploidValue(majorAllele, majorAllele);
        }
        return new CallAndAlleleDepth(unphasedDiploidValue, iArr);
    }

    private String createConsensusTaxonName(Collection<Taxon> collection) {
        return (String) collection.stream().map(taxon -> {
            return taxon.getName() + "_0";
        }).sorted().collect(Collectors.joining(":"));
    }

    public String pluginUserManualURL() {
        return "https://bitbucket.org/tasseladmin/tassel-5-source/wiki/UserManual/Kinship/Missing";
    }

    public ImageIcon getIcon() {
        URL resource = KinshipPlugin.class.getResource("/net/maizegenetics/analysis/images/missing.gif");
        if (resource == null) {
            return null;
        }
        return new ImageIcon(resource);
    }

    public String getButtonName() {
        return "PHG Cluster Haplotypes";
    }

    public String getToolTipText() {
        return "PHG Cluster Haplotypes";
    }

    public String vcfDir() {
        return (String) this.vcfDir.value();
    }

    public FindHaplotypeClustersPlugin vcfDir(String str) {
        this.vcfDir = new PluginParameter<>(this.vcfDir, str);
        return this;
    }

    public String outFile() {
        return (String) this.outFile.value();
    }

    public FindHaplotypeClustersPlugin outFile(String str) {
        this.outFile = new PluginParameter<>(this.outFile, str);
        return this;
    }

    public Double maxDistFromFounder() {
        return (Double) this.maxDistFromFounder.value();
    }

    public FindHaplotypeClustersPlugin maxDistFromFounder(Double d) {
        this.maxDistFromFounder = new PluginParameter<>(this.maxDistFromFounder, d);
        return this;
    }

    public Double seqErrorRate() {
        return (Double) this.seqErrorRate.value();
    }

    public FindHaplotypeClustersPlugin seqErrorRate(Double d) {
        this.seqErrorRate = new PluginParameter<>(this.seqErrorRate, d);
        return this;
    }

    public Integer minSiteForComp() {
        return (Integer) this.minSiteForComp.value();
    }

    public FindHaplotypeClustersPlugin minSiteForComp(Integer num) {
        this.minSiteForComp = new PluginParameter<>(this.minSiteForComp, num);
        return this;
    }

    public Double minTaxaCoverage() {
        return (Double) this.minTaxaCoverage.value();
    }

    public FindHaplotypeClustersPlugin minTaxaCoverage(Double d) {
        this.minTaxaCoverage = new PluginParameter<>(this.minTaxaCoverage, d);
        return this;
    }

    public Integer minTaxaInGroup() {
        return (Integer) this.minTaxaInGroup.value();
    }

    public FindHaplotypeClustersPlugin minTaxaInGroup(Integer num) {
        this.minTaxaInGroup = new PluginParameter<>(this.minTaxaInGroup, num);
        return this;
    }

    public GenomeSequence referenceSequence() {
        return (GenomeSequence) this.referenceSequence.value();
    }

    public FindHaplotypeClustersPlugin referenceSequence(GenomeSequence genomeSequence) {
        this.referenceSequence = new PluginParameter<>(this.referenceSequence, genomeSequence);
        return this;
    }

    public String sequenceOutDir() {
        return (String) this.sequenceOutDir.value();
    }

    public FindHaplotypeClustersPlugin sequenceOutDir(String str) {
        this.sequenceOutDir = new PluginParameter<>(this.sequenceOutDir, str);
        return this;
    }

    public String intervalFile() {
        return (String) this.intervalFile.value();
    }

    public FindHaplotypeClustersPlugin intervalFile(String str) {
        this.intervalFile = new PluginParameter<>(this.intervalFile, str);
        return this;
    }

    public CLUSTER_METHOD clusterMethod() {
        return (CLUSTER_METHOD) this.clusterMethod.value();
    }

    public FindHaplotypeClustersPlugin clusterMethod(CLUSTER_METHOD cluster_method) {
        this.clusterMethod = new PluginParameter<>(this.clusterMethod, cluster_method);
        return this;
    }

    public Double maxError() {
        return (Double) this.maxError.value();
    }

    public FindHaplotypeClustersPlugin maxError(Double d) {
        this.maxError = new PluginParameter<>(this.maxError, d);
        return this;
    }

    public Boolean replaceNsWithMajor() {
        return (Boolean) this.replaceNsWithMajor.value();
    }

    public FindHaplotypeClustersPlugin replaceNsWithMajor(Boolean bool) {
        this.replaceNsWithMajor = new PluginParameter<>(this.replaceNsWithMajor, bool);
        return this;
    }

    public Boolean useDepthForCalls() {
        return (Boolean) this.useDepthForCalls.value();
    }

    public FindHaplotypeClustersPlugin useDepthForCalls(Boolean bool) {
        this.useDepthForCalls = new PluginParameter<>(this.useDepthForCalls, bool);
        return this;
    }
}
