package picard.sam.markduplicates;

import htsjdk.samtools.DuplicateSet;
import htsjdk.samtools.DuplicateSetIterator;
import htsjdk.samtools.SAMRecordDuplicateComparator;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.Log;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Alpha;
import picard.sam.markduplicates.util.AbstractMarkDuplicatesCommandLineProgram;

@CommandLineProgramProperties(usage = "Identifies duplicate reads using information from read positions and UMIs.All records are then written to the output file with the duplicate records flagged.<p>UmiAwareMarkDuplicatesWithMateCigar locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. </p><p>This tool identifies a duplicate set by assuming that all members of a duplicate set must have the same start and end position,and must also have a sufficiently similar UMIs.  Sufficiently similar is parameterized by MAX_EDIT_DISTANCE_TO_JOIN which indicatesthe edit distance between UMIs that shall be considered to be part of the same original molecule.</p><p>This tool is not intended to be used on data without UMIs, see MarkDuplicates for marking duplicates thatdo not have UMIs.</p>", usageShort = UmiAwareMarkDuplicatesWithMateCigar.USAGE_SUMMARY, programGroup = Alpha.class)
/* loaded from: input_file:picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigar.class */
public class UmiAwareMarkDuplicatesWithMateCigar extends SimpleMarkDuplicatesWithMateCigar {
    static final String USAGE_SUMMARY = "Identifies duplicate reads using information from read positions and UMIs.All records are then written to the output file with the duplicate records flagged.";
    static final String USAGE_DETAILS = "<p>UmiAwareMarkDuplicatesWithMateCigar locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. </p><p>This tool identifies a duplicate set by assuming that all members of a duplicate set must have the same start and end position,and must also have a sufficiently similar UMIs.  Sufficiently similar is parameterized by MAX_EDIT_DISTANCE_TO_JOIN which indicatesthe edit distance between UMIs that shall be considered to be part of the same original molecule.</p><p>This tool is not intended to be used on data without UMIs, see MarkDuplicates for marking duplicates thatdo not have UMIs.</p>";

    @Option(shortName = "MAX_EDIT_DISTANCE_TO_JOIN", doc = "Largest edit distance that UMIs must have in order to be considered as coming from distinct source molecules.", optional = true)
    public int MAX_EDIT_DISTANCE_TO_JOIN = 1;

    @Option(shortName = "UMI_TAG_NAME", doc = "Tag name to use for UMI", optional = true)
    public String UMI_TAG_NAME = "RX";

    @Option(shortName = "ASSIGNED_UMI_TAG", doc = "Tag name to use for assigned UMI", optional = true)
    public String ASSIGNED_UMI_TAG = "MI";

    @Option(doc = "Allow for missing UMIs if data doesn't have UMIs.  This option is intended to be used only for testing the code.  Use SimpleMarkDuplicatesWithMateCigar if data has missing UMIs.", optional = true)
    public boolean ALLOW_MISSING_UMIS = false;
    private final Log log = Log.getInstance(UmiAwareMarkDuplicatesWithMateCigar.class);

    @Override // picard.sam.markduplicates.SimpleMarkDuplicatesWithMateCigar
    protected CloseableIterator<DuplicateSet> getDuplicateSetIterator(AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator samHeaderAndIterator, SAMRecordDuplicateComparator sAMRecordDuplicateComparator) {
        return new UmiAwareDuplicateSetIterator(new DuplicateSetIterator(samHeaderAndIterator.iterator, samHeaderAndIterator.header, false, sAMRecordDuplicateComparator), this.MAX_EDIT_DISTANCE_TO_JOIN, this.UMI_TAG_NAME, this.ASSIGNED_UMI_TAG, this.ALLOW_MISSING_UMIS);
    }
}
