package picard.vcf;

import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.liftover.LiftOver;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.StringUtil;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.vcf.VCFRecordCodec;
import java.io.File;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.programgroups.VcfOrBcf;
import picard.util.IlluminaUtil;
import picard.util.LiftoverUtils;

@CommandLineProgramProperties(summary = "Lifts over a VCF file from one reference build to another.  This tool adjusts the coordinates of variants within a VCF file to match a new reference. The output file will be sorted and indexed using the target reference build. To be clear, REFERENCE_SEQUENCE should be the <em>target</em> reference build. The tool is based on the UCSC liftOver tool (see: http://genome.ucsc.edu/cgi-bin/hgLiftOver) and uses a UCSC chain file to guide its operation. <br /><br />Note that records may be rejected because they cannot be lifted over or because of sequence incompatibilities between the source and target reference genomes.  Rejected records will be emitted with filters to the REJECT file, using the source genome coordinates.<br /><h4>Usage example:</h4><pre>java -jar picard.jar LiftoverVcf \\<br />     I=input.vcf \\<br />     O=lifted_over.vcf \\<br />     CHAIN=b37tohg19.chain \\<br />     REJECT=rejected_variants.vcf \\<br />     R=reference_sequence.fasta</pre>For additional information, please see: http://genome.ucsc.edu/cgi-bin/hgLiftOver<hr />", oneLineSummary = LiftoverVcf.USAGE_SUMMARY, programGroup = VcfOrBcf.class)
@DocumentedFeature
/* loaded from: input_file:picard/vcf/LiftoverVcf.class */
public class LiftoverVcf extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Lifts over a VCF file from one reference build to another.  ";
    static final String USAGE_DETAILS = "This tool adjusts the coordinates of variants within a VCF file to match a new reference. The output file will be sorted and indexed using the target reference build. To be clear, REFERENCE_SEQUENCE should be the <em>target</em> reference build. The tool is based on the UCSC liftOver tool (see: http://genome.ucsc.edu/cgi-bin/hgLiftOver) and uses a UCSC chain file to guide its operation. <br /><br />Note that records may be rejected because they cannot be lifted over or because of sequence incompatibilities between the source and target reference genomes.  Rejected records will be emitted with filters to the REJECT file, using the source genome coordinates.<br /><h4>Usage example:</h4><pre>java -jar picard.jar LiftoverVcf \\<br />     I=input.vcf \\<br />     O=lifted_over.vcf \\<br />     CHAIN=b37tohg19.chain \\<br />     REJECT=rejected_variants.vcf \\<br />     R=reference_sequence.fasta</pre>For additional information, please see: http://genome.ucsc.edu/cgi-bin/hgLiftOver<hr />";

    @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input VCF/BCF file to be lifted over.")
    public File INPUT;

    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output location to write the lifted over VCF/BCF to.")
    public File OUTPUT;

    @Argument(shortName = "C", doc = "The liftover chain file. See https://genome.ucsc.edu/goldenPath/help/chain.html for a description of chain files.  See http://hgdownload.soe.ucsc.edu/downloads.html#terms for where to download chain files.")
    public File CHAIN;

    @Argument(doc = "File to which to write rejected records.")
    public File REJECT;
    public static final String ATTEMPTED_LOCUS = "AttemptedLocus";
    private VariantContextWriter rejects;
    private SortingCollection<VariantContext> sorter;
    public static int EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE = 1;
    public static final String FILTER_CANNOT_LIFTOVER_INDEL = "ReverseComplementedIndel";
    public static final String FILTER_NO_TARGET = "NoTarget";
    public static final String FILTER_MISMATCHING_REF_ALLELE = "MismatchedRefAllele";
    public static final String FILTER_INDEL_STRADDLES_TWO_INTERVALS = "IndelStraddlesMultipleIntevals";
    private static final List<VCFFilterHeaderLine> FILTERS = CollectionUtil.makeList(new VCFFilterHeaderLine[]{new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_INDEL, "Indel falls into a reverse complemented region in the target genome."), new VCFFilterHeaderLine(FILTER_NO_TARGET, "Variant could not be lifted between genome builds."), new VCFFilterHeaderLine(FILTER_MISMATCHING_REF_ALLELE, "Reference allele does not match reference genome sequence after liftover."), new VCFFilterHeaderLine(FILTER_INDEL_STRADDLES_TWO_INTERVALS, "Indel is straddling multiple intervalss in the chain, and so the results are not well defined.")});
    public static final String ORIGINAL_CONTIG = "OriginalContig";
    public static final String ORIGINAL_START = "OriginalStart";
    private static final List<VCFInfoHeaderLine> ATTRS = CollectionUtil.makeList(new VCFInfoHeaderLine[]{new VCFInfoHeaderLine(ORIGINAL_CONTIG, 1, VCFHeaderLineType.String, "The name of the source contig/chromosome prior to liftover."), new VCFInfoHeaderLine(ORIGINAL_START, 1, VCFHeaderLineType.String, "The position of the variant on the source contig prior to liftover.")});

    @Argument(shortName = "WMC", doc = "Warn on missing contig.", optional = true)
    public boolean WARN_ON_MISSING_CONTIG = false;

    @Argument(doc = "Write the original contig/position for lifted variants to the INFO field.", optional = true)
    public boolean WRITE_ORIGINAL_POSITION = false;

    @Argument(doc = "The minimum percent match required for a variant to be lifted.", optional = true)
    public double LIFTOVER_MIN_MATCH = 1.0d;

    @Argument(doc = "Allow INFO and FORMAT in the records that are not found in the header", optional = true)
    public boolean ALLOW_MISSING_FIELDS_IN_HEADER = false;
    private final Log log = Log.getInstance(LiftoverVcf.class);
    private long failedLiftover = 0;
    private long failedAlleleCheck = 0;

    @Override // picard.cmdline.CommandLineProgram
    protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
        return new ReferenceArgumentCollection() { // from class: picard.vcf.LiftoverVcf.1

            @Argument(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, common = false, doc = "The reference sequence (fasta) for the TARGET genome build.  The fasta file must have an accompanying sequence dictionary (.dict file).")
            public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA;

            @Override // picard.cmdline.argumentcollections.ReferenceArgumentCollection
            public File getReferenceFile() {
                return this.REFERENCE_SEQUENCE;
            }
        };
    }

    public static void main(String[] strArr) {
        new LiftoverVcf().instanceMainWithExit(strArr);
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        IOUtil.assertFileIsReadable(this.INPUT);
        IOUtil.assertFileIsReadable(this.REFERENCE_SEQUENCE);
        IOUtil.assertFileIsReadable(this.CHAIN);
        IOUtil.assertFileIsWritable(this.OUTPUT);
        IOUtil.assertFileIsWritable(this.REJECT);
        LiftOver liftOver = new LiftOver(this.CHAIN);
        VCFFileReader vCFFileReader = new VCFFileReader(this.INPUT, false);
        this.log.info(new Object[]{"Loading up the target reference genome."});
        ReferenceSequenceFileWalker referenceSequenceFileWalker = new ReferenceSequenceFileWalker(this.REFERENCE_SEQUENCE);
        HashMap hashMap = new HashMap();
        for (SAMSequenceRecord sAMSequenceRecord : referenceSequenceFileWalker.getSequenceDictionary().getSequences()) {
            hashMap.put(sAMSequenceRecord.getSequenceName(), referenceSequenceFileWalker.get(sAMSequenceRecord.getSequenceIndex()));
        }
        CloserUtil.close(referenceSequenceFileWalker);
        VCFHeader vCFHeader = new VCFHeader(vCFFileReader.getFileHeader());
        vCFHeader.setSequenceDictionary(referenceSequenceFileWalker.getSequenceDictionary());
        if (this.WRITE_ORIGINAL_POSITION) {
            Iterator<VCFInfoHeaderLine> it = ATTRS.iterator();
            while (it.hasNext()) {
                vCFHeader.addMetaDataLine(it.next());
            }
        }
        VariantContextWriter build = new VariantContextWriterBuilder().setOption(Options.INDEX_ON_THE_FLY).modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, this.ALLOW_MISSING_FIELDS_IN_HEADER).setOutputFile(this.OUTPUT).setReferenceDictionary(referenceSequenceFileWalker.getSequenceDictionary()).build();
        build.writeHeader(vCFHeader);
        this.rejects = new VariantContextWriterBuilder().setOutputFile(this.REJECT).unsetOption(Options.INDEX_ON_THE_FLY).modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, this.ALLOW_MISSING_FIELDS_IN_HEADER).build();
        VCFHeader vCFHeader2 = new VCFHeader(vCFFileReader.getFileHeader());
        Iterator<VCFFilterHeaderLine> it2 = FILTERS.iterator();
        while (it2.hasNext()) {
            vCFHeader2.addMetaDataLine(it2.next());
        }
        vCFHeader2.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS, 1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to mismatching alleles."));
        this.rejects.writeHeader(vCFHeader2);
        long j = 0;
        this.log.info(new Object[]{"Lifting variants over and sorting."});
        this.sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(vCFHeader, this.ALLOW_MISSING_FIELDS_IN_HEADER || this.VALIDATION_STRINGENCY != ValidationStringency.STRICT), vCFHeader.getVCFRecordComparator(), this.MAX_RECORDS_IN_RAM.intValue(), this.TMP_DIR);
        ProgressLogger progressLogger = new ProgressLogger(this.log, 1000000, "read");
        CloseableIterator it3 = vCFFileReader.iterator();
        while (it3.hasNext()) {
            VariantContext variantContext = (VariantContext) it3.next();
            j++;
            Interval liftOver2 = liftOver.liftOver(new Interval(variantContext.getContig(), variantContext.getStart(), variantContext.getEnd(), false, variantContext.getContig() + ":" + variantContext.getStart() + IlluminaUtil.BARCODE_DELIMITER + variantContext.getEnd()), this.LIFTOVER_MIN_MATCH);
            if (liftOver2 == null) {
                rejectVariant(variantContext, FILTER_NO_TARGET);
            } else if (variantContext.getReference().length() != liftOver2.length()) {
                rejectVariant(variantContext, FILTER_INDEL_STRADDLES_TWO_INTERVALS);
            } else {
                if (liftOver2.isNegativeStrand() && (variantContext.isMixed() || (variantContext.isIndel() && !variantContext.isBiallelic()))) {
                    rejectVariant(variantContext, FILTER_CANNOT_LIFTOVER_INDEL);
                } else if (hashMap.containsKey(liftOver2.getContig())) {
                    ReferenceSequence referenceSequence = (ReferenceSequence) hashMap.get(liftOver2.getContig());
                    VariantContext liftVariant = LiftoverUtils.liftVariant(variantContext, liftOver2, referenceSequence, this.WRITE_ORIGINAL_POSITION);
                    if (liftVariant == null) {
                        throw new IllegalArgumentException("Unexpectedly found null VC. This should have not happened.");
                    }
                    tryToAddVariant(liftVariant, referenceSequence, variantContext);
                } else {
                    rejectVariant(variantContext, FILTER_NO_TARGET);
                    String str = "Encountered a contig, " + liftOver2.getContig() + " that is not part of the target reference.";
                    if (!this.WARN_ON_MISSING_CONTIG) {
                        this.log.error(new Object[]{str});
                        return EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE;
                    }
                    this.log.warn(new Object[]{str});
                }
                progressLogger.record(variantContext.getContig(), variantContext.getStart());
            }
        }
        String format = new DecimalFormat("0.0000%").format((this.failedLiftover + this.failedAlleleCheck) / j);
        this.log.info(new Object[]{"Processed ", Long.valueOf(j), " variants."});
        this.log.info(new Object[]{Long.valueOf(this.failedLiftover), " variants failed to liftover."});
        this.log.info(new Object[]{Long.valueOf(this.failedAlleleCheck), " variants lifted over but had mismatching reference alleles after lift over."});
        this.log.info(new Object[]{format, " of variants were not successfully lifted over and written to the output."});
        this.rejects.close();
        vCFFileReader.close();
        this.sorter.doneAdding();
        ProgressLogger progressLogger2 = new ProgressLogger(this.log, 1000000, "written");
        this.log.info(new Object[]{"Writing out sorted records to final VCF."});
        CloseableIterator it4 = this.sorter.iterator();
        while (it4.hasNext()) {
            VariantContext variantContext2 = (VariantContext) it4.next();
            build.add(variantContext2);
            progressLogger2.record(variantContext2.getContig(), variantContext2.getStart());
        }
        build.close();
        this.sorter.cleanup();
        return 0;
    }

    private void rejectVariant(VariantContext variantContext, String str) {
        this.rejects.add(new VariantContextBuilder(variantContext).filter(str).make());
        this.failedLiftover++;
    }

    private void tryToAddVariant(VariantContext variantContext, ReferenceSequence referenceSequence, VariantContext variantContext2) {
        if (!referenceSequence.getName().equals(variantContext.getContig())) {
            throw new IllegalStateException("The contig of the VariantContext, " + variantContext.getContig() + ", doesnt match the ReferenceSequence: " + referenceSequence.getName());
        }
        boolean z = false;
        Iterator it = variantContext.getAlleles().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Allele allele = (Allele) it.next();
            if (allele.isReference()) {
                if (!StringUtil.bytesToString(referenceSequence.getBases(), variantContext.getStart() - 1, (variantContext.getEnd() - variantContext.getStart()) + 1).equalsIgnoreCase(allele.getBaseString())) {
                    z = true;
                }
            }
        }
        if (!z) {
            this.sorter.add(variantContext);
        } else {
            this.rejects.add(new VariantContextBuilder(variantContext2).filter(FILTER_MISMATCHING_REF_ALLELE).attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", variantContext.getContig(), Integer.valueOf(variantContext.getStart()), Integer.valueOf(variantContext.getEnd()))).make());
            this.failedAlleleCheck++;
        }
    }
}
