package picard.sam.markduplicates;

import htsjdk.samtools.DuplicateScoringStrategy;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.SortingLongCollection;
import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import picard.sam.DuplicationMetrics;
import picard.sam.markduplicates.util.AbstractMarkDuplicatesCommandLineProgram;
import picard.sam.markduplicates.util.DiskBasedReadEndsForMarkDuplicatesMap;
import picard.sam.markduplicates.util.LibraryIdGenerator;
import picard.sam.markduplicates.util.ReadEnds;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicatesCodec;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicatesWithBarcodes;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicatesWithBarcodesCodec;
import picard.sam.markduplicates.util.RepresentativeReadIndexerCodec;
import picard.sam.util.RepresentativeReadIndexer;

@CommandLineProgramProperties(summary = "Identifies duplicate reads.  <p>This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA.  Duplicates can arise during sample preparation e.g. library construction using PCR.  See also <a href='https://broadinstitute.github.io/picard/command-line-overview.html#EstimateLibraryComplexity'>EstimateLibraryComplexity</a> for additional notes on PCR duplication artifacts.  Duplicate reads can also result from a single amplification cluster, incorrectly detected as multiple clusters by the optical sensor of the sequencing instrument.  These duplication artifacts are referred to as optical duplicates.</p><p>The MarkDuplicates tool works by comparing sequences in the 5 prime positions of both reads and read-pairs in a SAM/BAM file.  An BARCODE_TAG option is available to facilitate duplicate marking using molecular barcodes.  After duplicate reads are collected, the tool differentiates the primary and duplicate reads using an algorithm that ranks reads by the sums of their base-quality scores (default method).</p>  <p>The tool's main output is a new SAM or BAM file, in which duplicates have been identified in the SAM flags field for each read.  Duplicates are marked with the hexadecimal value of 0x0400, which corresponds to a decimal value of 1024.  If you are not familiar with this type of annotation, please see the following <a href='https://www.broadinstitute.org/gatk/blog?id=7019'>blog post</a> for additional information.</p><p>Although the bitwise flag annotation indicates whether a read was marked as a duplicate, it does not identify the type of duplicate.  To do this, a new tag called the duplicate type (DT) tag was recently added as an optional output in  the 'optional field' section of a SAM/BAM file.  Invoking the TAGGING_POLICY option, you can instruct the program to mark all the duplicates (All), only the optical duplicates (OpticalOnly), or no duplicates (DontTag).  The records within the output of a SAM/BAM file will have values for the 'DT' tag (depending on the invoked TAGGING_POLICY), as either library/PCR-generated duplicates (LB), or sequencing-platform artifact duplicates (SQ).  This tool uses the READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options as the primary methods to identify and differentiate duplicate types.  Set READ_NAME_REGEX to null to skip optical duplicate detection, e.g. for RNA-seq or other data where duplicate sets are extremely large and estimating library complexity is not an aim.  Note that without optical duplicate counts, library size estimation will be inaccurate.</p> <p>MarkDuplicates also produces a metrics file indicating the numbers of duplicates for both single- and paired-end reads.</p>  <p>The program can take either coordinate-sorted or query-sorted inputs, however the behavior is slightly different.  When the input is coordinate-sorted, unmapped mates of mapped records and supplementary/secondary alignments are not marked as duplicates.  However, when the input is query-sorted (actually query-grouped), then unmapped mates and secondary/supplementary reads are not excluded from the duplication test and can be marked as duplicate reads.</p>  <p>If desired, duplicates can be removed using the REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES options.</p><h4>Usage example:</h4><pre>java -jar picard.jar MarkDuplicates \\<br />      I=input.bam \\<br />      O=marked_duplicates.bam \\<br />      M=marked_dup_metrics.txt</pre>Please see <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#DuplicationMetrics'>MarkDuplicates</a> for detailed explanations of the output metrics.<hr />", oneLineSummary = MarkDuplicates.USAGE_SUMMARY, programGroup = ReadDataManipulationProgramGroup.class)
@DocumentedFeature
/* loaded from: input_file:picard/sam/markduplicates/MarkDuplicates.class */
public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
    static final String USAGE_SUMMARY = "Identifies duplicate reads.  ";
    static final String USAGE_DETAILS = "<p>This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA.  Duplicates can arise during sample preparation e.g. library construction using PCR.  See also <a href='https://broadinstitute.github.io/picard/command-line-overview.html#EstimateLibraryComplexity'>EstimateLibraryComplexity</a> for additional notes on PCR duplication artifacts.  Duplicate reads can also result from a single amplification cluster, incorrectly detected as multiple clusters by the optical sensor of the sequencing instrument.  These duplication artifacts are referred to as optical duplicates.</p><p>The MarkDuplicates tool works by comparing sequences in the 5 prime positions of both reads and read-pairs in a SAM/BAM file.  An BARCODE_TAG option is available to facilitate duplicate marking using molecular barcodes.  After duplicate reads are collected, the tool differentiates the primary and duplicate reads using an algorithm that ranks reads by the sums of their base-quality scores (default method).</p>  <p>The tool's main output is a new SAM or BAM file, in which duplicates have been identified in the SAM flags field for each read.  Duplicates are marked with the hexadecimal value of 0x0400, which corresponds to a decimal value of 1024.  If you are not familiar with this type of annotation, please see the following <a href='https://www.broadinstitute.org/gatk/blog?id=7019'>blog post</a> for additional information.</p><p>Although the bitwise flag annotation indicates whether a read was marked as a duplicate, it does not identify the type of duplicate.  To do this, a new tag called the duplicate type (DT) tag was recently added as an optional output in  the 'optional field' section of a SAM/BAM file.  Invoking the TAGGING_POLICY option, you can instruct the program to mark all the duplicates (All), only the optical duplicates (OpticalOnly), or no duplicates (DontTag).  The records within the output of a SAM/BAM file will have values for the 'DT' tag (depending on the invoked TAGGING_POLICY), as either library/PCR-generated duplicates (LB), or sequencing-platform artifact duplicates (SQ).  This tool uses the READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options as the primary methods to identify and differentiate duplicate types.  Set READ_NAME_REGEX to null to skip optical duplicate detection, e.g. for RNA-seq or other data where duplicate sets are extremely large and estimating library complexity is not an aim.  Note that without optical duplicate counts, library size estimation will be inaccurate.</p> <p>MarkDuplicates also produces a metrics file indicating the numbers of duplicates for both single- and paired-end reads.</p>  <p>The program can take either coordinate-sorted or query-sorted inputs, however the behavior is slightly different.  When the input is coordinate-sorted, unmapped mates of mapped records and supplementary/secondary alignments are not marked as duplicates.  However, when the input is query-sorted (actually query-grouped), then unmapped mates and secondary/supplementary reads are not excluded from the duplication test and can be marked as duplicate reads.</p>  <p>If desired, duplicates can be removed using the REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES options.</p><h4>Usage example:</h4><pre>java -jar picard.jar MarkDuplicates \\<br />      I=input.bam \\<br />      O=marked_duplicates.bam \\<br />      M=marked_dup_metrics.txt</pre>Please see <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#DuplicationMetrics'>MarkDuplicates</a> for detailed explanations of the output metrics.<hr />";
    public static final String DUPLICATE_TYPE_TAG = "DT";
    public static final String DUPLICATE_TYPE_LIBRARY = "LB";
    public static final String DUPLICATE_TYPE_SEQUENCING = "SQ";
    public static final String DUPLICATE_SET_INDEX_TAG = "DI";
    public static final String DUPLICATE_SET_SIZE_TAG = "DS";
    private SortingCollection<ReadEndsForMarkDuplicates> pairSort;
    private SortingCollection<ReadEndsForMarkDuplicates> fragSort;
    private SortingLongCollection duplicateIndexes;
    private SortingLongCollection opticalDuplicateIndexes;
    private SortingCollection<RepresentativeReadIndexer> representativeReadIndicesForDuplicates;
    private static final long NO_SUCH_INDEX = Long.MAX_VALUE;
    private final Log log = Log.getInstance(MarkDuplicates.class);

    @Argument(shortName = "MAX_SEQS", doc = "This option is obsolete. ReadEnds will always be spilled to disk.")
    public int MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP = 50000;

    @Argument(shortName = "MAX_FILE_HANDLES", doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.")
    public int MAX_FILE_HANDLES_FOR_READ_ENDS_MAP = 8000;

    @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections.  If you are running out of memory, try reducing this number.")
    public double SORTING_COLLECTION_SIZE_RATIO = 0.25d;

    @Argument(doc = "Barcode SAM tag (ex. BC for 10X Genomics)", optional = true)
    public String BARCODE_TAG = null;

    @Argument(doc = "Read one barcode SAM tag (ex. BX for 10X Genomics)", optional = true)
    public String READ_ONE_BARCODE_TAG = null;

    @Argument(doc = "Read two barcode SAM tag (ex. BX for 10X Genomics)", optional = true)
    public String READ_TWO_BARCODE_TAG = null;

    @Argument(doc = "If a read appears in a duplicate set, add two tags. The first tag, DUPLICATE_SET_SIZE_TAG (DS), indicates the size of the duplicate set. The smallest possible DS value is 2 which occurs when two reads map to the same portion of the reference only one of which is marked as duplicate. The second tag, DUPLICATE_SET_INDEX_TAG (DI), represents a unique identifier for the duplicate set to which the record belongs. This identifier is the index-in-file of the representative read that was selected out of the duplicate set.", optional = true)
    public boolean TAG_DUPLICATE_SET_MEMBERS = false;

    @Argument(doc = "If true remove 'optical' duplicates and other duplicates that appear to have arisen from the sequencing process instead of the library preparation process, even if REMOVE_DUPLICATES is false. If REMOVE_DUPLICATES is true, all duplicates are removed and this option is ignored.")
    public boolean REMOVE_SEQUENCING_DUPLICATES = false;

    @Argument(doc = "Determines how duplicate types are recorded in the DT optional attribute.")
    public DuplicateTaggingPolicy TAGGING_POLICY = DuplicateTaggingPolicy.DontTag;

    @Argument(doc = "Clear DT tag from input SAM records. Should be set to false if input SAM doesn't have this tag.  Default true")
    public boolean CLEAR_DT = true;
    private int numDuplicateIndices = 0;
    protected LibraryIdGenerator libraryIdGenerator = null;

    /* loaded from: input_file:picard/sam/markduplicates/MarkDuplicates$DuplicateTaggingPolicy.class */
    public enum DuplicateTaggingPolicy {
        DontTag,
        OpticalOnly,
        All
    }

    /* loaded from: input_file:picard/sam/markduplicates/MarkDuplicates$DuplicateType.class */
    public enum DuplicateType {
        LIBRARY(MarkDuplicates.DUPLICATE_TYPE_LIBRARY),
        SEQUENCING(MarkDuplicates.DUPLICATE_TYPE_SEQUENCING);

        private final String code;

        DuplicateType(String str) {
            this.code = str;
        }

        public String code() {
            return this.code;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:picard/sam/markduplicates/MarkDuplicates$ReadEndsMDComparator.class */
    public static class ReadEndsMDComparator implements Comparator<ReadEndsForMarkDuplicates> {
        final boolean useBarcodes;

        public ReadEndsMDComparator(boolean z) {
            this.useBarcodes = z;
        }

        @Override // java.util.Comparator
        public int compare(ReadEndsForMarkDuplicates readEndsForMarkDuplicates, ReadEndsForMarkDuplicates readEndsForMarkDuplicates2) {
            int i = readEndsForMarkDuplicates.libraryId - readEndsForMarkDuplicates2.libraryId;
            if (this.useBarcodes) {
                ReadEndsForMarkDuplicatesWithBarcodes readEndsForMarkDuplicatesWithBarcodes = (ReadEndsForMarkDuplicatesWithBarcodes) readEndsForMarkDuplicates;
                ReadEndsForMarkDuplicatesWithBarcodes readEndsForMarkDuplicatesWithBarcodes2 = (ReadEndsForMarkDuplicatesWithBarcodes) readEndsForMarkDuplicates2;
                if (i == 0) {
                    i = Integer.compare(readEndsForMarkDuplicatesWithBarcodes.barcode, readEndsForMarkDuplicatesWithBarcodes2.barcode);
                }
                if (i == 0) {
                    i = Integer.compare(readEndsForMarkDuplicatesWithBarcodes.readOneBarcode, readEndsForMarkDuplicatesWithBarcodes2.readOneBarcode);
                }
                if (i == 0) {
                    i = Integer.compare(readEndsForMarkDuplicatesWithBarcodes.readTwoBarcode, readEndsForMarkDuplicatesWithBarcodes2.readTwoBarcode);
                }
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.read1ReferenceIndex - readEndsForMarkDuplicates2.read1ReferenceIndex;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.read1Coordinate - readEndsForMarkDuplicates2.read1Coordinate;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.orientation - readEndsForMarkDuplicates2.orientation;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.read2ReferenceIndex - readEndsForMarkDuplicates2.read2ReferenceIndex;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.read2Coordinate - readEndsForMarkDuplicates2.read2Coordinate;
            }
            if (i == 0) {
                i = (int) (readEndsForMarkDuplicates.read1IndexInFile - readEndsForMarkDuplicates2.read1IndexInFile);
            }
            if (i == 0) {
                i = (int) (readEndsForMarkDuplicates.read2IndexInFile - readEndsForMarkDuplicates2.read2IndexInFile);
            }
            return i;
        }
    }

    private int getBarcodeValue(SAMRecord sAMRecord) {
        return EstimateLibraryComplexity.getReadBarcodeValue(sAMRecord, this.BARCODE_TAG);
    }

    private int getReadOneBarcodeValue(SAMRecord sAMRecord) {
        return EstimateLibraryComplexity.getReadBarcodeValue(sAMRecord, this.READ_ONE_BARCODE_TAG);
    }

    private int getReadTwoBarcodeValue(SAMRecord sAMRecord) {
        return EstimateLibraryComplexity.getReadBarcodeValue(sAMRecord, this.READ_TWO_BARCODE_TAG);
    }

    public MarkDuplicates() {
        this.DUPLICATE_SCORING_STRATEGY = DuplicateScoringStrategy.ScoringStrategy.SUM_OF_BASE_QUALITIES;
    }

    public static void main(String[] strArr) {
        new MarkDuplicates().instanceMainWithExit(strArr);
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        IOUtil.assertInputsAreValid(this.INPUT);
        IOUtil.assertFileIsWritable(this.OUTPUT);
        IOUtil.assertFileIsWritable(this.METRICS_FILE);
        boolean z = (null == this.BARCODE_TAG && null == this.READ_ONE_BARCODE_TAG && null == this.READ_TWO_BARCODE_TAG) ? false : true;
        reportMemoryStats("Start of doWork");
        this.log.info(new Object[]{"Reading input file and constructing read end information."});
        buildSortedReadEndLists(z);
        reportMemoryStats("After buildSortedReadEndLists");
        generateDuplicateIndexes(z, this.REMOVE_SEQUENCING_DUPLICATES || this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag);
        reportMemoryStats("After generateDuplicateIndexes");
        this.log.info(new Object[]{"Marking " + this.numDuplicateIndices + " records as duplicates."});
        if (this.READ_NAME_REGEX == null) {
            this.log.warn(new Object[]{"Skipped optical duplicate cluster discovery; library size estimation may be inaccurate!"});
        } else {
            this.log.info(new Object[]{"Found " + this.libraryIdGenerator.getNumberOfOpticalDuplicateClusters() + " optical duplicate clusters."});
        }
        AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator openInputs = openInputs(false);
        SAMFileHeader sAMFileHeader = openInputs.header;
        SAMFileHeader.SortOrder sortOrder = sAMFileHeader.getSortOrder();
        SAMFileHeader clone = sAMFileHeader.clone();
        this.log.info(new Object[]{"Reads are assumed to be ordered by: " + sortOrder});
        if (sortOrder != SAMFileHeader.SortOrder.coordinate && sortOrder != SAMFileHeader.SortOrder.queryname) {
            throw new PicardException("This program requires input that are either coordinate or query sorted. Found " + sortOrder);
        }
        List<String> list = this.COMMENT;
        clone.getClass();
        list.forEach(clone::addComment);
        Map<String, String> chainedPgIds = getChainedPgIds(clone);
        SAMFileWriter makeSAMOrBAMWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(clone, true, this.OUTPUT);
        long j = 0;
        long next = (this.opticalDuplicateIndexes == null || !this.opticalDuplicateIndexes.hasNext()) ? NO_SUCH_INDEX : this.opticalDuplicateIndexes.next();
        long next2 = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX;
        CloseableIterator closeableIterator = null;
        int i = -1;
        int i2 = -1;
        int i3 = -1;
        if (this.TAG_DUPLICATE_SET_MEMBERS) {
            closeableIterator = this.representativeReadIndicesForDuplicates.iterator();
            if (closeableIterator.hasNext()) {
                RepresentativeReadIndexer representativeReadIndexer = (RepresentativeReadIndexer) closeableIterator.next();
                i3 = representativeReadIndexer.readIndexInFile;
                i = representativeReadIndexer.representativeReadIndexInFile;
                i2 = representativeReadIndexer.setSize;
            }
        }
        ProgressLogger progressLogger = new ProgressLogger(this.log, 10000000, "Written");
        CloseableIterator<SAMRecord> closeableIterator2 = openInputs.iterator;
        String str = null;
        String str2 = null;
        while (closeableIterator2.hasNext()) {
            SAMRecord sAMRecord = (SAMRecord) closeableIterator2.next();
            String libraryName = LibraryIdGenerator.getLibraryName(sAMFileHeader, sAMRecord);
            DuplicationMetrics metricsByLibrary = this.libraryIdGenerator.getMetricsByLibrary(libraryName);
            if (metricsByLibrary == null) {
                metricsByLibrary = new DuplicationMetrics();
                metricsByLibrary.LIBRARY = libraryName;
                this.libraryIdGenerator.addMetricsByLibrary(libraryName, metricsByLibrary);
            }
            if (sAMRecord.getReadUnmappedFlag()) {
                metricsByLibrary.UNMAPPED_READS++;
            } else if (sAMRecord.isSecondaryOrSupplementary()) {
                metricsByLibrary.SECONDARY_OR_SUPPLEMENTARY_RDS++;
            } else if (!sAMRecord.getReadPairedFlag() || sAMRecord.getMateUnmappedFlag()) {
                metricsByLibrary.UNPAIRED_READS_EXAMINED++;
            } else {
                metricsByLibrary.READ_PAIRS_EXAMINED++;
            }
            if (j > next2 && (sortOrder == SAMFileHeader.SortOrder.coordinate || !sAMRecord.getReadName().equals(str))) {
                next2 = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX;
            }
            if (j == next2 || (sortOrder == SAMFileHeader.SortOrder.queryname && j > next2 && sAMRecord.getReadName().equals(str))) {
                str = sAMRecord.getReadName();
                sAMRecord.setDuplicateReadFlag(true);
                if (!sAMRecord.isSecondaryOrSupplementary() && !sAMRecord.getReadUnmappedFlag()) {
                    if (!sAMRecord.getReadPairedFlag() || sAMRecord.getMateUnmappedFlag()) {
                        metricsByLibrary.UNPAIRED_READ_DUPLICATES++;
                    } else {
                        metricsByLibrary.READ_PAIR_DUPLICATES++;
                    }
                }
            } else {
                sAMRecord.setDuplicateReadFlag(false);
            }
            if (j > next && (sortOrder == SAMFileHeader.SortOrder.coordinate || !sAMRecord.getReadName().equals(str2))) {
                next = this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX;
            }
            boolean z2 = (sortOrder == SAMFileHeader.SortOrder.queryname && j > next && sAMRecord.getReadName().equals(str2)) || j == next;
            if (this.CLEAR_DT) {
                sAMRecord.setAttribute(DUPLICATE_TYPE_TAG, (Object) null);
            }
            if (this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag && sAMRecord.getDuplicateReadFlag()) {
                if (z2) {
                    str2 = sAMRecord.getReadName();
                    sAMRecord.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.SEQUENCING.code());
                } else if (this.TAGGING_POLICY == DuplicateTaggingPolicy.All) {
                    sAMRecord.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.LIBRARY.code());
                }
            }
            if (this.TAG_DUPLICATE_SET_MEMBERS) {
                if ((j > ((long) i3)) && closeableIterator.hasNext()) {
                    RepresentativeReadIndexer representativeReadIndexer2 = (RepresentativeReadIndexer) closeableIterator.next();
                    i3 = representativeReadIndexer2.readIndexInFile;
                    i = representativeReadIndexer2.representativeReadIndexInFile;
                    i2 = representativeReadIndexer2.setSize;
                }
                if ((j == ((long) i3) || (sortOrder == SAMFileHeader.SortOrder.queryname && j > next2)) && !sAMRecord.isSecondaryOrSupplementary() && !sAMRecord.getReadUnmappedFlag() && this.TAG_DUPLICATE_SET_MEMBERS) {
                    sAMRecord.setAttribute(DUPLICATE_SET_INDEX_TAG, Integer.valueOf(i));
                    sAMRecord.setAttribute(DUPLICATE_SET_SIZE_TAG, Integer.valueOf(i2));
                }
            }
            j++;
            if (!this.REMOVE_DUPLICATES || !sAMRecord.getDuplicateReadFlag()) {
                if (!this.REMOVE_SEQUENCING_DUPLICATES || !z2) {
                    if (this.PROGRAM_RECORD_ID != null && this.pgTagArgumentCollection.ADD_PG_TAG_TO_READS.booleanValue()) {
                        sAMRecord.setAttribute(SAMTag.PG.name(), chainedPgIds.get(sAMRecord.getStringAttribute(SAMTag.PG.name())));
                    }
                    makeSAMOrBAMWriter.addAlignment(sAMRecord);
                    progressLogger.record(sAMRecord);
                }
            }
        }
        closeableIterator2.close();
        this.duplicateIndexes.cleanup();
        if (this.TAG_DUPLICATE_SET_MEMBERS) {
            this.representativeReadIndicesForDuplicates.cleanup();
        }
        reportMemoryStats("Before output close");
        makeSAMOrBAMWriter.close();
        reportMemoryStats("After output close");
        finalizeAndWriteMetrics(this.libraryIdGenerator);
        return 0;
    }

    long numOpticalDuplicates() {
        return (long) this.libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap().getSumOfValues();
    }

    private void reportMemoryStats(String str) {
        System.gc();
        Runtime runtime = Runtime.getRuntime();
        this.log.info(new Object[]{str + " freeMemory: " + runtime.freeMemory() + "; totalMemory: " + runtime.totalMemory() + "; maxMemory: " + runtime.maxMemory()});
    }

    private void buildSortedReadEndLists(boolean z) {
        ReadEndsForMarkDuplicatesCodec readEndsForMarkDuplicatesCodec;
        ReadEndsForMarkDuplicatesCodec readEndsForMarkDuplicatesCodec2;
        ReadEndsForMarkDuplicatesCodec readEndsForMarkDuplicatesCodec3;
        int sizeOf = z ? ReadEndsForMarkDuplicatesWithBarcodes.getSizeOf() : ReadEndsForMarkDuplicates.getSizeOf();
        this.MAX_RECORDS_IN_RAM = Integer.valueOf(((int) (Runtime.getRuntime().maxMemory() / sizeOf)) / 2);
        int maxMemory = (int) ((Runtime.getRuntime().maxMemory() * this.SORTING_COLLECTION_SIZE_RATIO) / sizeOf);
        this.log.info(new Object[]{"Will retain up to " + maxMemory + " data points before spilling to disk."});
        if (z) {
            readEndsForMarkDuplicatesCodec = new ReadEndsForMarkDuplicatesWithBarcodesCodec();
            readEndsForMarkDuplicatesCodec2 = new ReadEndsForMarkDuplicatesWithBarcodesCodec();
            readEndsForMarkDuplicatesCodec3 = new ReadEndsForMarkDuplicatesWithBarcodesCodec();
        } else {
            readEndsForMarkDuplicatesCodec = new ReadEndsForMarkDuplicatesCodec();
            readEndsForMarkDuplicatesCodec2 = new ReadEndsForMarkDuplicatesCodec();
            readEndsForMarkDuplicatesCodec3 = new ReadEndsForMarkDuplicatesCodec();
        }
        this.pairSort = SortingCollection.newInstance(ReadEndsForMarkDuplicates.class, readEndsForMarkDuplicatesCodec2, new ReadEndsMDComparator(z), maxMemory, this.TMP_DIR);
        this.fragSort = SortingCollection.newInstance(ReadEndsForMarkDuplicates.class, readEndsForMarkDuplicatesCodec, new ReadEndsMDComparator(z), maxMemory, this.TMP_DIR);
        AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator openInputs = openInputs(true);
        SAMFileHeader.SortOrder sortOrder = openInputs.header.getSortOrder();
        SAMFileHeader sAMFileHeader = openInputs.header;
        DiskBasedReadEndsForMarkDuplicatesMap diskBasedReadEndsForMarkDuplicatesMap = new DiskBasedReadEndsForMarkDuplicatesMap(this.MAX_FILE_HANDLES_FOR_READ_ENDS_MAP, readEndsForMarkDuplicatesCodec3);
        long j = 0;
        ProgressLogger progressLogger = new ProgressLogger(this.log, 1000000, "Read");
        CloseableIterator<SAMRecord> closeableIterator = openInputs.iterator;
        if (null == this.libraryIdGenerator) {
            this.libraryIdGenerator = new LibraryIdGenerator(sAMFileHeader);
        }
        String str = null;
        long j2 = Long.MAX_VALUE;
        while (closeableIterator.hasNext()) {
            SAMRecord sAMRecord = (SAMRecord) closeableIterator.next();
            if (this.PROGRAM_RECORD_ID != null) {
                this.pgIdsSeen.add(sAMRecord.getStringAttribute(SAMTag.PG.name()));
            }
            if (sortOrder == SAMFileHeader.SortOrder.queryname && !sAMRecord.getReadName().equals(str)) {
                str = sAMRecord.getReadName();
                j2 = j;
            }
            if (sAMRecord.getReadUnmappedFlag()) {
                if (sAMRecord.getReferenceIndex().intValue() == -1 && sortOrder == SAMFileHeader.SortOrder.coordinate) {
                    break;
                }
            } else if (!sAMRecord.isSecondaryOrSupplementary()) {
                long j3 = sortOrder == SAMFileHeader.SortOrder.queryname ? j2 : j;
                ReadEndsForMarkDuplicates buildReadEnds = buildReadEnds(sAMFileHeader, j3, sAMRecord, z);
                this.fragSort.add(buildReadEnds);
                if (sAMRecord.getReadPairedFlag() && !sAMRecord.getMateUnmappedFlag()) {
                    String str2 = sAMRecord.getAttribute(ReservedTagConstants.READ_GROUP_ID) + ":" + sAMRecord.getReadName();
                    ReadEndsForMarkDuplicates remove = diskBasedReadEndsForMarkDuplicatesMap.remove(sAMRecord.getReferenceIndex().intValue(), str2);
                    if (remove == null) {
                        ReadEndsForMarkDuplicates mo149clone = buildReadEnds.mo149clone();
                        diskBasedReadEndsForMarkDuplicatesMap.put(mo149clone.read2ReferenceIndex, str2, mo149clone);
                    } else {
                        int i = buildReadEnds.read1ReferenceIndex;
                        int i2 = buildReadEnds.read1Coordinate;
                        if (sAMRecord.getFirstOfPairFlag()) {
                            remove.orientationForOpticalDuplicates = ReadEnds.getOrientationByte(sAMRecord.getReadNegativeStrandFlag(), remove.orientation == 1);
                            if (z) {
                                ((ReadEndsForMarkDuplicatesWithBarcodes) remove).readOneBarcode = getReadOneBarcodeValue(sAMRecord);
                            }
                        } else {
                            remove.orientationForOpticalDuplicates = ReadEnds.getOrientationByte(remove.orientation == 1, sAMRecord.getReadNegativeStrandFlag());
                            if (z) {
                                ((ReadEndsForMarkDuplicatesWithBarcodes) remove).readTwoBarcode = getReadTwoBarcodeValue(sAMRecord);
                            }
                        }
                        if (i > remove.read1ReferenceIndex || (i == remove.read1ReferenceIndex && i2 >= remove.read1Coordinate)) {
                            remove.read2ReferenceIndex = i;
                            remove.read2Coordinate = i2;
                            remove.read2IndexInFile = j3;
                            remove.orientation = ReadEnds.getOrientationByte(remove.orientation == 1, sAMRecord.getReadNegativeStrandFlag());
                            if (remove.read2ReferenceIndex == remove.read1ReferenceIndex && remove.read2Coordinate == remove.read1Coordinate && remove.orientation == 5) {
                                remove.orientation = (byte) 3;
                            }
                        } else {
                            remove.read2ReferenceIndex = remove.read1ReferenceIndex;
                            remove.read2Coordinate = remove.read1Coordinate;
                            remove.read2IndexInFile = remove.read1IndexInFile;
                            remove.read1ReferenceIndex = i;
                            remove.read1Coordinate = i2;
                            remove.read1IndexInFile = j3;
                            remove.orientation = ReadEnds.getOrientationByte(sAMRecord.getReadNegativeStrandFlag(), remove.orientation == 1);
                        }
                        remove.score = (short) (remove.score + DuplicateScoringStrategy.computeDuplicateScore(sAMRecord, this.DUPLICATE_SCORING_STRATEGY));
                        this.pairSort.add(remove);
                    }
                }
            }
            j++;
            if (progressLogger.record(sAMRecord)) {
                this.log.info(new Object[]{"Tracking " + diskBasedReadEndsForMarkDuplicatesMap.size() + " as yet unmatched pairs. " + diskBasedReadEndsForMarkDuplicatesMap.sizeInRam() + " records in RAM."});
            }
        }
        this.log.info(new Object[]{"Read " + j + " records. " + diskBasedReadEndsForMarkDuplicatesMap.size() + " pairs never matched."});
        closeableIterator.close();
        this.pairSort.doneAdding();
        this.fragSort.doneAdding();
    }

    private ReadEndsForMarkDuplicates buildReadEnds(SAMFileHeader sAMFileHeader, long j, SAMRecord sAMRecord, boolean z) {
        ReadEndsForMarkDuplicates readEndsForMarkDuplicatesWithBarcodes = z ? new ReadEndsForMarkDuplicatesWithBarcodes() : new ReadEndsForMarkDuplicates();
        readEndsForMarkDuplicatesWithBarcodes.read1ReferenceIndex = sAMRecord.getReferenceIndex().intValue();
        readEndsForMarkDuplicatesWithBarcodes.read1Coordinate = sAMRecord.getReadNegativeStrandFlag() ? sAMRecord.getUnclippedEnd() : sAMRecord.getUnclippedStart();
        readEndsForMarkDuplicatesWithBarcodes.orientation = sAMRecord.getReadNegativeStrandFlag() ? (byte) 1 : (byte) 0;
        readEndsForMarkDuplicatesWithBarcodes.read1IndexInFile = j;
        readEndsForMarkDuplicatesWithBarcodes.score = DuplicateScoringStrategy.computeDuplicateScore(sAMRecord, this.DUPLICATE_SCORING_STRATEGY);
        if (sAMRecord.getReadPairedFlag() && !sAMRecord.getMateUnmappedFlag()) {
            readEndsForMarkDuplicatesWithBarcodes.read2ReferenceIndex = sAMRecord.getMateReferenceIndex().intValue();
        }
        readEndsForMarkDuplicatesWithBarcodes.libraryId = this.libraryIdGenerator.getLibraryId(sAMRecord);
        if (this.opticalDuplicateFinder.addLocationInformation(sAMRecord.getReadName(), readEndsForMarkDuplicatesWithBarcodes)) {
            readEndsForMarkDuplicatesWithBarcodes.readGroup = (short) 0;
            String str = (String) sAMRecord.getAttribute(ReservedTagConstants.READ_GROUP_ID);
            List readGroups = sAMFileHeader.getReadGroups();
            if (str != null && readGroups != null) {
                Iterator it = readGroups.iterator();
                while (it.hasNext() && !((SAMReadGroupRecord) it.next()).getReadGroupId().equals(str)) {
                    ReadEndsForMarkDuplicates readEndsForMarkDuplicates = readEndsForMarkDuplicatesWithBarcodes;
                    readEndsForMarkDuplicates.readGroup = (short) (readEndsForMarkDuplicates.readGroup + 1);
                }
            }
        }
        if (z) {
            ReadEndsForMarkDuplicatesWithBarcodes readEndsForMarkDuplicatesWithBarcodes2 = (ReadEndsForMarkDuplicatesWithBarcodes) readEndsForMarkDuplicatesWithBarcodes;
            readEndsForMarkDuplicatesWithBarcodes2.barcode = getBarcodeValue(sAMRecord);
            if (!sAMRecord.getReadPairedFlag() || sAMRecord.getFirstOfPairFlag()) {
                readEndsForMarkDuplicatesWithBarcodes2.readOneBarcode = getReadOneBarcodeValue(sAMRecord);
            } else {
                readEndsForMarkDuplicatesWithBarcodes2.readTwoBarcode = getReadTwoBarcodeValue(sAMRecord);
            }
        }
        return readEndsForMarkDuplicatesWithBarcodes;
    }

    private void generateDuplicateIndexes(boolean z, boolean z2) {
        int i = this.TAG_DUPLICATE_SET_MEMBERS ? 16 : 8;
        int min = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25d) / i, 2.147483642E9d);
        if (z2) {
            min /= (i + 8) / i;
            this.opticalDuplicateIndexes = new SortingLongCollection(min, (File[]) this.TMP_DIR.toArray(new File[this.TMP_DIR.size()]));
        }
        this.log.info(new Object[]{"Will retain up to " + min + " duplicate indices before spilling to disk."});
        this.duplicateIndexes = new SortingLongCollection(min, (File[]) this.TMP_DIR.toArray(new File[this.TMP_DIR.size()]));
        if (this.TAG_DUPLICATE_SET_MEMBERS) {
            this.representativeReadIndicesForDuplicates = SortingCollection.newInstance(RepresentativeReadIndexer.class, new RepresentativeReadIndexerCodec(), Comparator.comparing(representativeReadIndexer -> {
                return Integer.valueOf(representativeReadIndexer.readIndexInFile);
            }), min, this.TMP_DIR);
        }
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates = null;
        ArrayList arrayList = new ArrayList(200);
        this.log.info(new Object[]{"Traversing read pair information and detecting duplicates."});
        CloseableIterator it = this.pairSort.iterator();
        while (it.hasNext()) {
            ReadEndsForMarkDuplicates readEndsForMarkDuplicates2 = (ReadEndsForMarkDuplicates) it.next();
            if (readEndsForMarkDuplicates == null || !areComparableForDuplicates(readEndsForMarkDuplicates, readEndsForMarkDuplicates2, true, z)) {
                if (arrayList.size() > 1) {
                    markDuplicatePairs(arrayList);
                    if (this.TAG_DUPLICATE_SET_MEMBERS) {
                        addRepresentativeReadIndex(arrayList);
                    }
                }
                arrayList.clear();
                arrayList.add(readEndsForMarkDuplicates2);
                readEndsForMarkDuplicates = readEndsForMarkDuplicates2;
            } else {
                arrayList.add(readEndsForMarkDuplicates2);
            }
        }
        if (arrayList.size() > 1) {
            markDuplicatePairs(arrayList);
            if (this.TAG_DUPLICATE_SET_MEMBERS) {
                addRepresentativeReadIndex(arrayList);
            }
        }
        this.pairSort.cleanup();
        this.pairSort = null;
        this.log.info(new Object[]{"Traversing fragment information and detecting duplicates."});
        boolean z3 = false;
        boolean z4 = false;
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates3 = null;
        CloseableIterator it2 = this.fragSort.iterator();
        while (it2.hasNext()) {
            ReadEndsForMarkDuplicates readEndsForMarkDuplicates4 = (ReadEndsForMarkDuplicates) it2.next();
            if (readEndsForMarkDuplicates3 == null || !areComparableForDuplicates(readEndsForMarkDuplicates3, readEndsForMarkDuplicates4, false, z)) {
                if (arrayList.size() > 1 && z4) {
                    markDuplicateFragments(arrayList, z3);
                }
                arrayList.clear();
                arrayList.add(readEndsForMarkDuplicates4);
                readEndsForMarkDuplicates3 = readEndsForMarkDuplicates4;
                z3 = readEndsForMarkDuplicates4.isPaired();
                z4 = !readEndsForMarkDuplicates4.isPaired();
            } else {
                arrayList.add(readEndsForMarkDuplicates4);
                z3 = z3 || readEndsForMarkDuplicates4.isPaired();
                z4 = z4 || !readEndsForMarkDuplicates4.isPaired();
            }
        }
        markDuplicateFragments(arrayList, z3);
        this.fragSort.cleanup();
        this.fragSort = null;
        this.log.info(new Object[]{"Sorting list of duplicate records."});
        this.duplicateIndexes.doneAddingStartIteration();
        if (this.opticalDuplicateIndexes != null) {
            this.opticalDuplicateIndexes.doneAddingStartIteration();
        }
        if (this.TAG_DUPLICATE_SET_MEMBERS) {
            this.representativeReadIndicesForDuplicates.doneAdding();
        }
    }

    private boolean areComparableForDuplicates(ReadEndsForMarkDuplicates readEndsForMarkDuplicates, ReadEndsForMarkDuplicates readEndsForMarkDuplicates2, boolean z, boolean z2) {
        boolean z3 = readEndsForMarkDuplicates.libraryId == readEndsForMarkDuplicates2.libraryId;
        if (z2 && z3) {
            ReadEndsForMarkDuplicatesWithBarcodes readEndsForMarkDuplicatesWithBarcodes = (ReadEndsForMarkDuplicatesWithBarcodes) readEndsForMarkDuplicates;
            ReadEndsForMarkDuplicatesWithBarcodes readEndsForMarkDuplicatesWithBarcodes2 = (ReadEndsForMarkDuplicatesWithBarcodes) readEndsForMarkDuplicates2;
            z3 = readEndsForMarkDuplicatesWithBarcodes.barcode == readEndsForMarkDuplicatesWithBarcodes2.barcode && readEndsForMarkDuplicatesWithBarcodes.readOneBarcode == readEndsForMarkDuplicatesWithBarcodes2.readOneBarcode && readEndsForMarkDuplicatesWithBarcodes.readTwoBarcode == readEndsForMarkDuplicatesWithBarcodes2.readTwoBarcode;
        }
        if (z3) {
            z3 = readEndsForMarkDuplicates.read1ReferenceIndex == readEndsForMarkDuplicates2.read1ReferenceIndex && readEndsForMarkDuplicates.read1Coordinate == readEndsForMarkDuplicates2.read1Coordinate && readEndsForMarkDuplicates.orientation == readEndsForMarkDuplicates2.orientation;
        }
        if (z3 && z) {
            z3 = readEndsForMarkDuplicates.read2ReferenceIndex == readEndsForMarkDuplicates2.read2ReferenceIndex && readEndsForMarkDuplicates.read2Coordinate == readEndsForMarkDuplicates2.read2Coordinate;
        }
        return z3;
    }

    private void addIndexAsDuplicate(long j) {
        this.duplicateIndexes.add(j);
        this.numDuplicateIndices++;
    }

    private void addRepresentativeReadOfDuplicateSet(long j, int i, long j2) {
        RepresentativeReadIndexer representativeReadIndexer = new RepresentativeReadIndexer();
        representativeReadIndexer.representativeReadIndexInFile = (int) j;
        representativeReadIndexer.setSize = i;
        representativeReadIndexer.readIndexInFile = (int) j2;
        this.representativeReadIndicesForDuplicates.add(representativeReadIndexer);
    }

    private void addRepresentativeReadIndex(List<ReadEndsForMarkDuplicates> list) {
        short s = 0;
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates = null;
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates2 : list) {
            if (readEndsForMarkDuplicates2.score > s || readEndsForMarkDuplicates == null) {
                s = readEndsForMarkDuplicates2.score;
                readEndsForMarkDuplicates = readEndsForMarkDuplicates2;
            }
        }
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates3 : list) {
            addRepresentativeReadOfDuplicateSet(readEndsForMarkDuplicates.read1IndexInFile, list.size(), readEndsForMarkDuplicates3.read1IndexInFile);
            addRepresentativeReadOfDuplicateSet(readEndsForMarkDuplicates.read1IndexInFile, list.size(), readEndsForMarkDuplicates3.read2IndexInFile);
        }
    }

    private void markDuplicatePairs(List<ReadEndsForMarkDuplicates> list) {
        short s = 0;
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates = null;
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates2 : list) {
            if (readEndsForMarkDuplicates2.score > s || readEndsForMarkDuplicates == null) {
                s = readEndsForMarkDuplicates2.score;
                readEndsForMarkDuplicates = readEndsForMarkDuplicates2;
            }
        }
        if (this.READ_NAME_REGEX != null) {
            AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(list, readEndsForMarkDuplicates, this.opticalDuplicateFinder, this.libraryIdGenerator);
        }
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates3 : list) {
            if (readEndsForMarkDuplicates3 != readEndsForMarkDuplicates) {
                addIndexAsDuplicate(readEndsForMarkDuplicates3.read1IndexInFile);
                if (readEndsForMarkDuplicates3.read2IndexInFile != readEndsForMarkDuplicates3.read1IndexInFile) {
                    addIndexAsDuplicate(readEndsForMarkDuplicates3.read2IndexInFile);
                }
                if (readEndsForMarkDuplicates3.isOpticalDuplicate && this.opticalDuplicateIndexes != null) {
                    this.opticalDuplicateIndexes.add(readEndsForMarkDuplicates3.read1IndexInFile);
                    this.opticalDuplicateIndexes.add(readEndsForMarkDuplicates3.read2IndexInFile);
                }
            }
        }
    }

    private void markDuplicateFragments(List<ReadEndsForMarkDuplicates> list, boolean z) {
        if (z) {
            for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates : list) {
                if (!readEndsForMarkDuplicates.isPaired()) {
                    addIndexAsDuplicate(readEndsForMarkDuplicates.read1IndexInFile);
                }
            }
            return;
        }
        short s = 0;
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates2 = null;
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates3 : list) {
            if (readEndsForMarkDuplicates3.score > s || readEndsForMarkDuplicates2 == null) {
                s = readEndsForMarkDuplicates3.score;
                readEndsForMarkDuplicates2 = readEndsForMarkDuplicates3;
            }
        }
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates4 : list) {
            if (readEndsForMarkDuplicates4 != readEndsForMarkDuplicates2) {
                addIndexAsDuplicate(readEndsForMarkDuplicates4.read1IndexInFile);
            }
        }
    }
}
