package picard.util;

import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.variant.vcf.VCFFileReader;
import java.io.File;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineParser;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.IntervalsManipulationProgramGroup;
import picard.util.IntervalListScatterer;

@CommandLineProgramProperties(summary = "A tool for performing various IntervalList manipulations <h3>Summary</h3>This tool offers multiple interval list file manipulation capabilities, including: sorting, merging, subtracting, padding, and other set-theoretic operations. The default action is to merge and sort the intervals provided in the INPUTs. Other options, e.g. interval subtraction, are controlled by the arguments.<br />Both IntervalList and VCF files are accepted as input. IntervalList should be denoted with the extension .interval_list, while a VCF must have one of .vcf, .vcf.gz, .bcf When VCF file is used as input, each variant is translated into an using its reference allele or the END INFO annotation (if present) to determine the extent of the interval. \nIntervalListTools can also \"scatter\" the resulting interval-list into many interval-files. This can be useful for creating multiple interval lists for scattering an analysis over.\n\n <h3>Details</h3> The IntervalList file format is designed to help the users avoid mixing references when supplying intervals and other genomic data to a single tool. A SAM style header must be present at the top of the file. After the header, the file then contains records, one per line in text format with the followingvalues tab-separated: \n\n - Sequence name (SN) \n - Start position (1-based)\n - End position (1-based, inclusive)\n - Strand (either + or -)\n - Interval name (ideally unique names for intervals)\n\nThe coordinate system is 1-based, closed-ended so that the first base in a sequence has position 1, and both the start and the end positions are included in an interval.\n\nExample interval list file<pre>@HD\tVN:1.0\n@SQ\tSN:chr1\tLN:501\n@SQ\tSN:chr2\tLN:401\nchr1\t1\t100\t+\tstarts at the first base of the contig and covers 100 bases\nchr2\t100\t100\t+\tinterval with exactly one base\n</pre>\n\n<h3>Usage Examples</h3><h4>1. Combine the intervals from two interval lists:</h4><pre>java -jar picard.jar IntervalListTools \\\n      ACTION=CONCAT \\\n      I=input.interval_list \\\n      I=input_2.interval_list \\\n      O=new.interval_list</pre> <h4>2. Combine the intervals from two interval lists, sorting the resulting in list and merging overlapping and abutting intervals:</h4> <pre> java -jar picard.jar IntervalListTools \\\n       ACTION=CONCAT \\\n       SORT=true \\\n       UNIQUE=true \\\n       I=input.interval_list \\\n       I=input_2.interval_list \\\n       O=new.interval_list </pre> <h4>3. Subtract the intervals in SECOND_INPUT from those in INPUT</h4> <pre> java -jar picard.jar IntervalListTools \\\n       ACTION=SUBTRACT \\\n       I=input.interval_list \\\n       SI=input_2.interval_list \\\n       O=new.interval_list </pre> <h4>4. Find bases that are in either input1.interval_list or input2.interval_list, and also in input3.interval_list:</h4> <pre> java -jar picard.jar IntervalListTools \\\n       ACTION=INTERSECT \\\n       I=input1.interval_list \\\n       I=input2.interval_list \\\n       SI=input3.interval_list \\\n       O=new.interval_list </pre>", oneLineSummary = IntervalListTools.USAGE_SUMMARY, programGroup = IntervalsManipulationProgramGroup.class)
@DocumentedFeature
/* loaded from: input_file:picard/util/IntervalListTools.class */
public class IntervalListTools extends CommandLineProgram {
    static final String USAGE_SUMMARY = "A tool for performing various IntervalList manipulations";
    static final String USAGE_DETAILS = " <h3>Summary</h3>This tool offers multiple interval list file manipulation capabilities, including: sorting, merging, subtracting, padding, and other set-theoretic operations. The default action is to merge and sort the intervals provided in the INPUTs. Other options, e.g. interval subtraction, are controlled by the arguments.<br />Both IntervalList and VCF files are accepted as input. IntervalList should be denoted with the extension .interval_list, while a VCF must have one of .vcf, .vcf.gz, .bcf When VCF file is used as input, each variant is translated into an using its reference allele or the END INFO annotation (if present) to determine the extent of the interval. \nIntervalListTools can also \"scatter\" the resulting interval-list into many interval-files. This can be useful for creating multiple interval lists for scattering an analysis over.\n\n <h3>Details</h3> The IntervalList file format is designed to help the users avoid mixing references when supplying intervals and other genomic data to a single tool. A SAM style header must be present at the top of the file. After the header, the file then contains records, one per line in text format with the followingvalues tab-separated: \n\n - Sequence name (SN) \n - Start position (1-based)\n - End position (1-based, inclusive)\n - Strand (either + or -)\n - Interval name (ideally unique names for intervals)\n\nThe coordinate system is 1-based, closed-ended so that the first base in a sequence has position 1, and both the start and the end positions are included in an interval.\n\nExample interval list file<pre>@HD\tVN:1.0\n@SQ\tSN:chr1\tLN:501\n@SQ\tSN:chr2\tLN:401\nchr1\t1\t100\t+\tstarts at the first base of the contig and covers 100 bases\nchr2\t100\t100\t+\tinterval with exactly one base\n</pre>\n\n<h3>Usage Examples</h3><h4>1. Combine the intervals from two interval lists:</h4><pre>java -jar picard.jar IntervalListTools \\\n      ACTION=CONCAT \\\n      I=input.interval_list \\\n      I=input_2.interval_list \\\n      O=new.interval_list</pre> <h4>2. Combine the intervals from two interval lists, sorting the resulting in list and merging overlapping and abutting intervals:</h4> <pre> java -jar picard.jar IntervalListTools \\\n       ACTION=CONCAT \\\n       SORT=true \\\n       UNIQUE=true \\\n       I=input.interval_list \\\n       I=input_2.interval_list \\\n       O=new.interval_list </pre> <h4>3. Subtract the intervals in SECOND_INPUT from those in INPUT</h4> <pre> java -jar picard.jar IntervalListTools \\\n       ACTION=SUBTRACT \\\n       I=input.interval_list \\\n       SI=input_2.interval_list \\\n       O=new.interval_list </pre> <h4>4. Find bases that are in either input1.interval_list or input2.interval_list, and also in input3.interval_list:</h4> <pre> java -jar picard.jar IntervalListTools \\\n       ACTION=INTERSECT \\\n       I=input1.interval_list \\\n       I=input2.interval_list \\\n       SI=input3.interval_list \\\n       O=new.interval_list </pre>";

    @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "One or more interval lists. If multiple interval lists are provided the output is theresult of merging the inputs. Supported formats are interval_list and VCF.", minElements = 1)
    public List<File> INPUT;

    @Argument(doc = "The output interval list file to write (if SCATTER_COUNT == 1) or the directory into which to write the scattered interval sub-directories (if SCATTER_COUNT > 1).", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true)
    public File OUTPUT;

    @Argument(shortName = "SI", doc = "Second set of intervals for SUBTRACT and DIFFERENCE operations.", optional = true)
    public List<File> SECOND_INPUT;
    private static final Log LOG = Log.getInstance(IntervalListTools.class);

    @Argument(doc = "The amount to pad each end of the intervals by before other operations are undertaken. Negative numbers are allowed and indicate intervals should be shrunk. Resulting intervals < 0 bases long will be removed. Padding is applied to the interval lists (both INPUT and SECOND_INPUT, if proivided) <b> before </b> the ACTION is performed.", optional = true)
    public int PADDING = 0;

    @Argument(doc = "If true, merge overlapping and adjacent intervals to create a list of unique intervals. Implies SORT=true.")
    public boolean UNIQUE = false;

    @Argument(doc = "If true, sort the resulting interval list by coordinate.")
    public boolean SORT = true;

    @Argument(doc = "Action to take on inputs.")
    public Action ACTION = Action.CONCAT;

    @Argument(doc = "One or more lines of comment to add to the header of the output file (as @CO lines in the SAM header).", optional = true)
    public List<String> COMMENT = null;

    @Argument(doc = "The number of files into which to scatter the resulting list by locus; in some situations, fewer intervals may be emitted.  Note - if > 1, the resultant scattered intervals will be sorted and uniqued.  The sort will be inverted if the INVERT flag is set.")
    public int SCATTER_COUNT = 1;

    @Argument(doc = "Whether to include filtered variants in the vcf when generating an interval list from vcf.", optional = true)
    public boolean INCLUDE_FILTERED = false;

    @Argument(shortName = "BRK", doc = "If set to a positive value will create a new interval list with the original intervals broken up at integer multiples of this value. Set to 0 to NOT break up intervals.", optional = true)
    public int BREAK_BANDS_AT_MULTIPLES_OF = 0;

    @Argument(shortName = StandardOptionDefinitions.METRICS_FILE_SHORT_NAME, doc = "Selects between various ways in which scattering of the interval-list can happen.")
    public IntervalListScatterer.Mode SUBDIVISION_MODE = IntervalListScatterer.Mode.INTERVAL_SUBDIVISION;

    @Argument(doc = "Produce the inverse list of intervals, that is, the regions in the genome that are <br>not</br> covered by any of the input intervals. Will merge abutting intervals first. Output will be sorted.", optional = true)
    public boolean INVERT = false;

    /* loaded from: input_file:picard/util/IntervalListTools$Action.class */
    public enum Action implements CommandLineParser.ClpEnum {
        CONCAT("The concatenation of all the intervals in all the INPUTs, no sorting or merging of overlapping/abutting intervals implied. Will result in a possibly unsorted list unless requested otherwise.", false) { // from class: picard.util.IntervalListTools.Action.1
            @Override // picard.util.IntervalListTools.Action
            IntervalList act(List<IntervalList> list, List<IntervalList> list2) {
                return IntervalList.concatenate(list);
            }
        },
        UNION("Like CONCATENATE but with UNIQUE and SORT implied, the result being the set-wise union of all INPUTS, with overlapping and abutting intervals merged into one.", false) { // from class: picard.util.IntervalListTools.Action.2
            @Override // picard.util.IntervalListTools.Action
            IntervalList act(List<IntervalList> list, List<IntervalList> list2) {
                return IntervalList.union(list);
            }
        },
        INTERSECT("The sorted and merged set of all loci that are contained in all of the INPUTs.", false) { // from class: picard.util.IntervalListTools.Action.3
            @Override // picard.util.IntervalListTools.Action
            IntervalList act(List<IntervalList> list, List<IntervalList> list2) {
                return IntervalList.intersection(list);
            }
        },
        SUBTRACT("Subtracts the intervals in SECOND_INPUT from those in INPUT. The resulting loci are those in INPUT that are not in SECOND_INPUT.", true) { // from class: picard.util.IntervalListTools.Action.4
            @Override // picard.util.IntervalListTools.Action
            IntervalList act(List<IntervalList> list, List<IntervalList> list2) {
                return IntervalList.subtract(list, list2);
            }
        },
        SYMDIFF("Results in loci that are in INPUT or SECOND_INPUT but are not in both.", true) { // from class: picard.util.IntervalListTools.Action.5
            @Override // picard.util.IntervalListTools.Action
            IntervalList act(List<IntervalList> list, List<IntervalList> list2) {
                return IntervalList.difference(list, list2);
            }
        },
        OVERLAPS("Outputs the entire intervals from INPUT that have bases which overlap any interval from SECOND_INPUT. Note that this is different than INTERSECT in that each original interval is either emitted in its entirety, or not at all.", true) { // from class: picard.util.IntervalListTools.Action.6
            @Override // picard.util.IntervalListTools.Action
            IntervalList act(List<IntervalList> list, List<IntervalList> list2) {
                return IntervalList.overlaps(list, list2);
            }
        };

        final String helpdoc;
        final boolean takesSecondInput;

        Action(String str, boolean z) {
            this.helpdoc = str;
            this.takesSecondInput = z;
        }

        public String getHelpDoc() {
            return this.helpdoc;
        }

        abstract IntervalList act(List<IntervalList> list, List<IntervalList> list2);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:picard/util/IntervalListTools$IntervalListInputType.class */
    public enum IntervalListInputType {
        VCF(IOUtil.VCF_EXTENSIONS) { // from class: picard.util.IntervalListTools.IntervalListInputType.1
            @Override // picard.util.IntervalListTools.IntervalListInputType
            protected IntervalList getIntervalListInternal(File file, boolean z) {
                return VCFFileReader.fromVcf(file, z);
            }
        },
        INTERVAL_LIST(".interval_list") { // from class: picard.util.IntervalListTools.IntervalListInputType.2
            @Override // picard.util.IntervalListTools.IntervalListInputType
            protected IntervalList getIntervalListInternal(File file, boolean z) {
                return IntervalList.fromFile(file);
            }
        };

        protected final Collection<String> applicableExtensions;

        IntervalListInputType(String... strArr) {
            this.applicableExtensions = CollectionUtil.makeSet(strArr);
        }

        IntervalListInputType(Collection collection) {
            this.applicableExtensions = collection;
        }

        protected abstract IntervalList getIntervalListInternal(File file, boolean z);

        static IntervalListInputType forFile(File file) {
            for (IntervalListInputType intervalListInputType : values()) {
                Iterator<String> it = intervalListInputType.applicableExtensions.iterator();
                while (it.hasNext()) {
                    if (file.getName().endsWith(it.next())) {
                        return intervalListInputType;
                    }
                }
            }
            throw new SAMException("Cannot figure out type of file " + file.getAbsolutePath() + " from extension. Current implementation understands the following types: " + Arrays.toString(values()));
        }

        public static IntervalList getIntervalList(File file, boolean z) {
            return forFile(file).getIntervalListInternal(file, z);
        }

        @Override // java.lang.Enum
        public String toString() {
            return super.toString() + ": " + this.applicableExtensions.toString();
        }
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        List<IntervalList> asList;
        IOUtil.assertFilesAreReadable(this.INPUT);
        IOUtil.assertFilesAreReadable(this.SECOND_INPUT);
        if (this.OUTPUT != null) {
            if (this.SCATTER_COUNT == 1) {
                IOUtil.assertFileIsWritable(this.OUTPUT);
            } else {
                IOUtil.assertDirectoryIsWritable(this.OUTPUT);
            }
        }
        List<IntervalList> openIntervalLists = openIntervalLists(this.INPUT);
        List<IntervalList> openIntervalLists2 = openIntervalLists(this.SECOND_INPUT);
        if (this.UNIQUE && !this.SORT) {
            LOG.warn(new Object[]{"UNIQUE=true requires sorting but SORT=false was specified.  Results will be sorted."});
        }
        IntervalList act = this.ACTION.act(openIntervalLists, openIntervalLists2);
        if (this.SCATTER_COUNT > 1) {
            this.SORT = true;
            this.UNIQUE = true;
        }
        if (this.INVERT) {
            this.SORT = false;
            this.UNIQUE = true;
        }
        IntervalList sorted = this.SORT ? act.sorted() : act;
        IntervalList invert = this.INVERT ? IntervalList.invert(sorted) : sorted;
        List intervals = this.UNIQUE ? invert.uniqued().getIntervals() : invert.getIntervals();
        if (this.BREAK_BANDS_AT_MULTIPLES_OF > 0) {
            intervals = IntervalList.breakIntervalsAtBandMultiples(intervals, this.BREAK_BANDS_AT_MULTIPLES_OF);
        }
        SAMFileHeader header = act.getHeader();
        HashSet hashSet = new HashSet();
        Iterator it = header.getProgramRecords().iterator();
        while (it.hasNext()) {
            hashSet.add(((SAMProgramRecord) it.next()).getId());
        }
        int i = 1;
        while (true) {
            if (i >= Integer.MAX_VALUE) {
                break;
            }
            if (!hashSet.contains(String.valueOf(i))) {
                SAMProgramRecord sAMProgramRecord = new SAMProgramRecord(String.valueOf(i));
                sAMProgramRecord.setCommandLine(getCommandLine());
                sAMProgramRecord.setProgramName(getClass().getSimpleName());
                header.addProgramRecord(sAMProgramRecord);
                break;
            }
            i++;
        }
        if (this.COMMENT != null) {
            Iterator<String> it2 = this.COMMENT.iterator();
            while (it2.hasNext()) {
                header.addComment(it2.next());
            }
        }
        IntervalList intervalList = new IntervalList(header);
        Iterator it3 = intervals.iterator();
        while (it3.hasNext()) {
            intervalList.add((Interval) it3.next());
        }
        if (this.OUTPUT == null) {
            asList = Arrays.asList(intervalList);
        } else if (this.SCATTER_COUNT == 1) {
            intervalList.write(this.OUTPUT);
            asList = Arrays.asList(intervalList);
        } else {
            List<IntervalList> writeScatterIntervals = writeScatterIntervals(intervalList);
            LOG.info(new Object[]{String.format("Wrote %s scatter subdirectories to %s.", Integer.valueOf(writeScatterIntervals.size()), this.OUTPUT)});
            if (writeScatterIntervals.size() != this.SCATTER_COUNT) {
                LOG.warn(new Object[]{String.format("Requested scatter width of %s, but only emitted %s.  (This may be an expected consequence of running in %s mode.)", Integer.valueOf(this.SCATTER_COUNT), Integer.valueOf(writeScatterIntervals.size()), this.SUBDIVISION_MODE)});
            }
            asList = writeScatterIntervals;
        }
        long j = 0;
        long j2 = 0;
        Iterator<IntervalList> it4 = asList.iterator();
        while (it4.hasNext()) {
            j += it4.next().getUniqueBaseCount();
            j2 += r0.size();
        }
        LOG.info(new Object[]{"Produced " + j2 + " intervals totalling " + j + " unique bases."});
        return 0;
    }

    private List<IntervalList> openIntervalLists(List<File> list) {
        ArrayList arrayList = new ArrayList();
        for (File file : list) {
            try {
                arrayList.add(IntervalListInputType.getIntervalList(file, this.INCLUDE_FILTERED).padded(this.PADDING));
            } catch (Exception e) {
                LOG.error(new Object[]{"There was a problem opening IntervalList file " + file.getAbsolutePath()});
                throw e;
            }
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // picard.cmdline.CommandLineProgram
    public String[] customCommandLineValidation() {
        ArrayList arrayList = new ArrayList();
        if (this.SCATTER_COUNT < 1) {
            arrayList.add("SCATTER_COUNT must be greater than 0.");
        }
        if (this.BREAK_BANDS_AT_MULTIPLES_OF < 0) {
            arrayList.add("BREAK_BANDS_AT_MULTIPLES_OF must be greater than or equal to 0.");
        }
        if ((this.SECOND_INPUT == null || this.SECOND_INPUT.isEmpty()) && this.ACTION.takesSecondInput) {
            arrayList.add("SECOND_INPUT was not provided but action " + this.ACTION + " requires a second input.");
        }
        if (this.SECOND_INPUT != null && !this.SECOND_INPUT.isEmpty() && !this.ACTION.takesSecondInput) {
            arrayList.add("SECOND_INPUT was provided but action " + this.ACTION + " doesn't take a second input.");
        }
        if (arrayList.isEmpty()) {
            return null;
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    private List<IntervalList> writeScatterIntervals(IntervalList intervalList) {
        List<IntervalList> scatter = new IntervalListScatterer(this.SUBDIVISION_MODE).scatter(intervalList, this.SCATTER_COUNT, this.UNIQUE);
        DecimalFormat decimalFormat = new DecimalFormat("0000");
        int i = 1;
        Iterator<IntervalList> it = scatter.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            it.next().write(createDirectoryAndGetScatterFile(this.OUTPUT, scatter.size(), decimalFormat.format(i2)));
        }
        return scatter;
    }

    public static File getScatteredFileName(File file, long j, String str) {
        return new File(file.getAbsolutePath() + "/temp_" + str + "_of_" + j + "/scattered.interval_list");
    }

    private static File createDirectoryAndGetScatterFile(File file, long j, String str) {
        createDirectoryOrFail(file);
        File scatteredFileName = getScatteredFileName(file, j, str);
        createDirectoryOrFail(scatteredFileName.getParentFile());
        return scatteredFileName;
    }

    private static void createDirectoryOrFail(File file) {
        if (!file.exists() && !file.mkdir()) {
            throw new PicardException("Unable to create directory: " + file.getAbsolutePath());
        }
    }
}
