package picard.util;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.Intervals;

@CommandLineProgramProperties(usage = "Writes an interval list based on splitting the reference by Ns.  This tool identifies positions in the reference where the basecalls are Ns and writes out an interval list using the resulting coordinates (excluding the N bases). This can be used to create an interval list for whole genome sequence (WGS) for e.g. scatter-gather purposes, as an alternative to using fixed-length intervals. The number of contiguous Ns that can be tolerated before creating a break is adjustable from the command line.<br /><h4>Usage example:</h4><pre>java -jar picard.jar ScatterIntervalsByNs \\<br />      R=reference_sequence.fasta \\<br />      OT=BOTH \\<br />      O=output.interval_list</pre><hr />", usageShort = ScatterIntervalsByNs.USAGE_SUMMARY, programGroup = Intervals.class)
/* loaded from: input_file:picard/util/ScatterIntervalsByNs.class */
public class ScatterIntervalsByNs extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Writes an interval list based on splitting the reference by Ns.  ";
    static final String USAGE_DETAILS = "This tool identifies positions in the reference where the basecalls are Ns and writes out an interval list using the resulting coordinates (excluding the N bases). This can be used to create an interval list for whole genome sequence (WGS) for e.g. scatter-gather purposes, as an alternative to using fixed-length intervals. The number of contiguous Ns that can be tolerated before creating a break is adjustable from the command line.<br /><h4>Usage example:</h4><pre>java -jar picard.jar ScatterIntervalsByNs \\<br />      R=reference_sequence.fasta \\<br />      OT=BOTH \\<br />      O=output.interval_list</pre><hr />";

    @Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence to use.")
    public File REFERENCE;

    @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Output file for interval list.")
    public File OUTPUT;
    static final String ACGTmer = "ACGTmer";
    static final String Nmer = "Nmer";
    private static final Log log = Log.getInstance(ScatterIntervalsByNs.class);

    @Option(shortName = "OT", doc = "Type of intervals to output.", optional = true)
    public OutputType OUTPUT_TYPE = OutputType.BOTH;

    @Option(shortName = "N", doc = "Maximal number of contiguous N bases to tolerate, thereby continuing the current ACGT interval.", optional = true)
    public int MAX_TO_MERGE = 1;
    final ProgressLogger locusProgress = new ProgressLogger(log, 10000000, "examined", "loci");
    final ProgressLogger intervalProgress = new ProgressLogger(log, 10, "found", "intervals");

    /* loaded from: input_file:picard/util/ScatterIntervalsByNs$OutputType.class */
    private enum OutputType {
        N(ScatterIntervalsByNs.Nmer),
        ACGT(ScatterIntervalsByNs.ACGTmer),
        BOTH(ScatterIntervalsByNs.Nmer, ScatterIntervalsByNs.ACGTmer);

        private final Set acceptedTypes = new HashSet();

        public Boolean accepts(String str) {
            return Boolean.valueOf(this.acceptedTypes.contains(str));
        }

        OutputType(String... strArr) {
            Collections.addAll(this.acceptedTypes, strArr);
        }
    }

    public static void main(String[] strArr) {
        new ScatterIntervalsByNs().instanceMainWithExit(strArr);
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        IOUtil.assertFileIsReadable(this.REFERENCE);
        IOUtil.assertFileIsWritable(this.OUTPUT);
        IntervalList segregateReference = segregateReference(ReferenceSequenceFileFactory.getReferenceSequenceFile(this.REFERENCE, true), this.MAX_TO_MERGE);
        log.info(new Object[]{String.format("Found %d intervals in %d loci during %s seconds", Long.valueOf(this.intervalProgress.getCount()), Long.valueOf(this.locusProgress.getCount()), Long.valueOf(this.locusProgress.getElapsedSeconds()))});
        IntervalList intervalList = new IntervalList(segregateReference.getHeader().clone());
        log.info(new Object[]{String.format("Collecting requested type of intervals (%s)", this.OUTPUT_TYPE)});
        for (Interval interval : segregateReference.getIntervals()) {
            if (this.OUTPUT_TYPE.accepts(interval.getName()).booleanValue()) {
                intervalList.add(interval);
            }
        }
        log.info(new Object[]{"Writing Intervals."});
        intervalList.write(this.OUTPUT);
        log.info(new Object[]{String.format("Execution ending. Total time %d seconds", Long.valueOf(this.locusProgress.getElapsedSeconds()))});
        return 0;
    }

    public static IntervalList segregateReference(ReferenceSequenceFile referenceSequenceFile, int i) {
        LinkedList linkedList = new LinkedList();
        SAMFileHeader sAMFileHeader = new SAMFileHeader();
        sAMFileHeader.setSequenceDictionary(referenceSequenceFile.getSequenceDictionary());
        sAMFileHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        IntervalList intervalList = new IntervalList(sAMFileHeader);
        for (SAMSequenceRecord sAMSequenceRecord : referenceSequenceFile.getSequenceDictionary().getSequences()) {
            byte[] bases = referenceSequenceFile.getSequence(sAMSequenceRecord.getSequenceName()).getBases();
            StringUtil.toUpperCase(bases);
            boolean z = bases[0] == 78;
            int i2 = 0;
            for (int i3 = 0; i3 < bases.length; i3++) {
                if (z != (bases[i3] == 78)) {
                    linkedList.add(new Interval(sAMSequenceRecord.getSequenceName(), i2 + 1, i3, false, z ? Nmer : ACGTmer));
                    i2 = i3;
                    z = !z;
                }
            }
            linkedList.add(new Interval(sAMSequenceRecord.getSequenceName(), i2 + 1, bases.length, false, z ? Nmer : ACGTmer));
        }
        while (!linkedList.isEmpty()) {
            if (linkedList.size() >= 3 && ((Interval) linkedList.get(0)).getName() == ACGTmer && ((Interval) linkedList.get(1)).getName() == Nmer && ((Interval) linkedList.get(2)).getName() == ACGTmer && ((Interval) linkedList.get(0)).abuts((Interval) linkedList.get(1)) && ((Interval) linkedList.get(1)).abuts((Interval) linkedList.get(2)) && ((Interval) linkedList.get(1)).length() <= i) {
                Interval interval = new Interval(((Interval) linkedList.get(0)).getSequence(), ((Interval) linkedList.get(0)).getStart(), ((Interval) linkedList.get(2)).getEnd(), false, ACGTmer);
                for (int i4 = 0; i4 < 3; i4++) {
                    linkedList.remove(0);
                }
                linkedList.add(0, interval);
            } else {
                intervalList.add((Interval) linkedList.remove(0));
            }
        }
        return intervalList;
    }
}
