/*
 * Decompiled with CFR 0.152.
 */
package org.seqdoop.hadoop_bam;

import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.index.Block;
import htsjdk.tribble.index.tabix.TabixIndex;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.io.compress.SplittableCompressionCodec;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.seqdoop.hadoop_bam.BCFRecordReader;
import org.seqdoop.hadoop_bam.BCFSplitGuesser;
import org.seqdoop.hadoop_bam.FileVirtualSplit;
import org.seqdoop.hadoop_bam.VCFFormat;
import org.seqdoop.hadoop_bam.VCFRecordReader;
import org.seqdoop.hadoop_bam.VariantContextWritable;
import org.seqdoop.hadoop_bam.util.BGZFCodec;
import org.seqdoop.hadoop_bam.util.BGZFEnhancedGzipCodec;
import org.seqdoop.hadoop_bam.util.WrapSeekable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class VCFInputFormat
extends FileInputFormat<LongWritable, VariantContextWritable> {
    private static final Logger logger = LoggerFactory.getLogger(VCFInputFormat.class);
    public static final String TRUST_EXTS_PROPERTY = "hadoopbam.vcf.trust-exts";
    public static final String INTERVALS_PROPERTY = "hadoopbam.vcf.intervals";
    private final Map<Path, VCFFormat> formatMap;
    private final boolean givenMap;
    private Configuration conf;
    private boolean trustExts;

    public static <T extends Locatable> void setIntervals(Configuration conf, List<T> intervals) {
        StringBuilder sb = new StringBuilder();
        Iterator<T> it = intervals.iterator();
        while (it.hasNext()) {
            Locatable l = (Locatable)it.next();
            sb.append(String.format("%s:%d-%d", l.getContig(), l.getStart(), l.getEnd()));
            if (!it.hasNext()) continue;
            sb.append(",");
        }
        conf.set(INTERVALS_PROPERTY, sb.toString());
    }

    static List<Interval> getIntervals(Configuration conf) {
        String intervalsProperty = conf.get(INTERVALS_PROPERTY);
        if (intervalsProperty == null) {
            return null;
        }
        ArrayList<Interval> intervals = new ArrayList<Interval>();
        for (String s : intervalsProperty.split(",")) {
            String[] parts = s.split(":|-");
            Interval interval = new Interval(parts[0], Integer.parseInt(parts[1]), Integer.parseInt(parts[2]));
            intervals.add(interval);
        }
        return intervals;
    }

    public VCFInputFormat() {
        this.formatMap = new HashMap<Path, VCFFormat>();
        this.givenMap = false;
        this.conf = null;
    }

    public VCFInputFormat(Configuration conf) {
        this.formatMap = new HashMap<Path, VCFFormat>();
        this.conf = conf;
        this.trustExts = conf.getBoolean(TRUST_EXTS_PROPERTY, true);
        this.givenMap = false;
    }

    public VCFInputFormat(Map<Path, VCFFormat> formatMap) {
        this.formatMap = formatMap;
        this.givenMap = true;
        this.conf = null;
        this.trustExts = false;
    }

    public VCFFormat getFormat(Path path) {
        VCFFormat f;
        VCFFormat fmt = this.formatMap.get(path);
        if (fmt != null || this.formatMap.containsKey(path)) {
            return fmt;
        }
        if (this.givenMap) {
            throw new IllegalArgumentException("VCF format for '" + path + "' not in given map");
        }
        if (this.conf == null) {
            throw new IllegalStateException("Don't have a Configuration yet");
        }
        if (this.trustExts && (f = VCFFormat.inferFromFilePath(path)) != null) {
            this.formatMap.put(path, f);
            return f;
        }
        try {
            fmt = VCFFormat.inferFromData((InputStream)path.getFileSystem(this.conf).open(path));
        }
        catch (IOException iOException) {
            // empty catch block
        }
        this.formatMap.put(path, fmt);
        return fmt;
    }

    protected boolean isSplitable(JobContext context, Path filename) {
        Configuration conf = context.getConfiguration();
        CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(filename);
        if (codec == null) {
            return true;
        }
        if (codec instanceof BGZFCodec || codec instanceof BGZFEnhancedGzipCodec) {
            boolean splittable;
            try (FSDataInputStream in = filename.getFileSystem(conf).open(filename);){
                splittable = BlockCompressedInputStream.isValidFile((InputStream)new BufferedInputStream((InputStream)in));
            }
            catch (IOException e) {
                splittable = false;
            }
            if (!splittable) {
                logger.warn("{} is not splittable, consider using block-compressed gzip (BGZF)", (Object)filename);
            }
            return splittable;
        }
        if (codec instanceof GzipCodec) {
            logger.warn("Using GzipCodec, which is not splittable, consider using block compressed gzip (BGZF) and BGZFCodec/BGZFEnhancedGzipCodec.");
        }
        return codec instanceof SplittableCompressionCodec;
    }

    public RecordReader<LongWritable, VariantContextWritable> createRecordReader(InputSplit split, TaskAttemptContext ctx) throws InterruptedException, IOException {
        RecordReader rr;
        VCFFormat fmt;
        Path path;
        if (split instanceof FileSplit) {
            path = ((FileSplit)split).getPath();
        } else if (split instanceof FileVirtualSplit) {
            path = ((FileVirtualSplit)split).getPath();
        } else {
            throw new IllegalArgumentException("split '" + split + "' has unknown type: cannot extract path");
        }
        if (this.conf == null) {
            this.conf = ctx.getConfiguration();
        }
        if ((fmt = this.getFormat(path)) == null) {
            throw new IllegalArgumentException("unknown VCF format, cannot create RecordReader: " + path);
        }
        switch (fmt) {
            case VCF: {
                rr = new VCFRecordReader();
                break;
            }
            case BCF: {
                rr = new BCFRecordReader();
                break;
            }
            default: {
                assert (false);
                return null;
            }
        }
        rr.initialize(split, ctx);
        return rr;
    }

    public List<InputSplit> getSplits(JobContext job) throws IOException {
        if (this.conf == null) {
            this.conf = job.getConfiguration();
        }
        List origSplits = super.getSplits(job);
        ArrayList<FileSplit> bcfOrigSplits = new ArrayList<FileSplit>(origSplits.size());
        ArrayList<InputSplit> newSplits = new ArrayList<InputSplit>(origSplits.size());
        for (InputSplit iSplit : origSplits) {
            FileSplit split = (FileSplit)iSplit;
            if (VCFFormat.BCF.equals((Object)this.getFormat(split.getPath()))) {
                bcfOrigSplits.add(split);
                continue;
            }
            newSplits.add((InputSplit)split);
        }
        this.fixBCFSplits(bcfOrigSplits, newSplits);
        return this.filterByInterval(newSplits, this.conf);
    }

    private void fixBCFSplits(List<FileSplit> splits, List<InputSplit> newSplits) throws IOException {
        Collections.sort(splits, new Comparator<FileSplit>(){

            @Override
            public int compare(FileSplit a, FileSplit b) {
                return a.getPath().compareTo((Object)b.getPath());
            }
        });
        int i = 0;
        while (i < splits.size()) {
            i = this.addGuessedSplits(splits, i, newSplits);
        }
    }

    private int addGuessedSplits(List<FileSplit> splits, int i, List<InputSplit> newSplits) throws IOException {
        FileSplit fspl;
        Path path = splits.get(i).getPath();
        WrapSeekable<FSDataInputStream> sin = WrapSeekable.openPath(this.conf, path);
        BCFSplitGuesser guesser = new BCFSplitGuesser(sin);
        boolean isBGZF = guesser.isBGZF();
        FileVirtualSplit prevSplit = null;
        while (i < splits.size() && (fspl = splits.get(i)).getPath().equals((Object)path)) {
            block4: {
                block5: {
                    long length;
                    long alignEnd;
                    long alignBeg;
                    String[] locs;
                    block2: {
                        block3: {
                            locs = fspl.getLocations();
                            long beg = fspl.getStart();
                            long end = beg + fspl.getLength();
                            alignBeg = guesser.guessNextBCFRecordStart(beg, end);
                            alignEnd = isBGZF ? end << 16 | 0xFFFFL : end;
                            length = alignEnd - alignBeg;
                            if (alignBeg != end) break block2;
                            if (prevSplit == null) {
                                throw new IOException("'" + path + "': no records in first split: bad BCF file or tiny split size?");
                            }
                            if (!isBGZF) break block3;
                            ((FileVirtualSplit)prevSplit).setEndVirtualOffset(alignEnd);
                            break block4;
                        }
                        prevSplit = new FileSplit(path, alignBeg, length, locs);
                        newSplits.remove(newSplits.size() - 1);
                        break block5;
                    }
                    prevSplit = isBGZF ? new FileVirtualSplit(path, alignBeg, alignEnd, locs) : new FileSplit(path, alignBeg, length, locs);
                }
                newSplits.add(prevSplit);
            }
            ++i;
        }
        sin.close();
        return i;
    }

    private List<InputSplit> filterByInterval(List<InputSplit> splits, Configuration conf) throws IOException {
        List<Interval> intervals = VCFInputFormat.getIntervals(conf);
        if (intervals == null) {
            return splits;
        }
        ArrayList blocks = new ArrayList();
        LinkedHashSet<Path> vcfFiles = new LinkedHashSet<Path>();
        for (InputSplit split : splits) {
            if (split instanceof FileSplit) {
                vcfFiles.add(((FileSplit)split).getPath());
                continue;
            }
            if (split instanceof FileVirtualSplit) {
                vcfFiles.add(((FileVirtualSplit)split).getPath());
                continue;
            }
            throw new IllegalArgumentException("split '" + split + "' has unknown type: cannot extract path");
        }
        for (Path vcfFile : vcfFiles) {
            Path indexFile = vcfFile.suffix(".tbi");
            FileSystem fs = vcfFile.getFileSystem(conf);
            if (!fs.exists(indexFile)) {
                logger.warn("No tabix index file found for {}, splits will not be filtered, which may be very inefficient", (Object)indexFile);
                return splits;
            }
            BlockCompressedInputStream in = new BlockCompressedInputStream((InputStream)fs.open(indexFile));
            Throwable throwable = null;
            try {
                TabixIndex index = new TabixIndex((InputStream)in);
                for (Locatable locatable : intervals) {
                    String contig = locatable.getContig();
                    int intervalStart = locatable.getStart();
                    int intervalEnd = locatable.getEnd();
                    blocks.addAll(index.getBlocks(contig, intervalStart, intervalEnd));
                }
            }
            catch (Throwable index) {
                throwable = index;
                throw index;
            }
            finally {
                if (in == null) continue;
                if (throwable != null) {
                    try {
                        in.close();
                    }
                    catch (Throwable index) {
                        throwable.addSuppressed(index);
                    }
                    continue;
                }
                in.close();
            }
        }
        ArrayList<InputSplit> filteredSplits = new ArrayList<InputSplit>();
        block12: for (InputSplit split : splits) {
            long splitEnd;
            long splitStart;
            if (split instanceof FileSplit) {
                FileSplit fileSplit = (FileSplit)split;
                splitStart = fileSplit.getStart() << 16;
                splitEnd = fileSplit.getStart() + fileSplit.getLength() << 16;
                for (Block block : blocks) {
                    long blockEnd;
                    long blockStart = block.getStartPosition();
                    if (!VCFInputFormat.overlaps(splitStart, splitEnd, blockStart, blockEnd = block.getEndPosition())) continue;
                    filteredSplits.add(split);
                    continue block12;
                }
                continue;
            }
            FileVirtualSplit virtualSplit = (FileVirtualSplit)split;
            splitStart = virtualSplit.getStartVirtualOffset();
            splitEnd = virtualSplit.getEndVirtualOffset();
            long l = Long.MAX_VALUE;
            long newEnd = Long.MIN_VALUE;
            boolean overlaps = false;
            for (Block block : blocks) {
                long blockEnd;
                long blockStart = block.getStartPosition();
                if (!VCFInputFormat.overlaps(splitStart, splitEnd, blockStart, blockEnd = block.getEndPosition())) continue;
                long overlapStart = Math.max(splitStart, blockStart);
                long overlapEnd = Math.min(splitEnd, blockEnd);
                l = Math.min(l, overlapStart);
                newEnd = Math.max(newEnd, overlapEnd);
                overlaps = true;
            }
            if (!overlaps) continue;
            filteredSplits.add(new FileVirtualSplit(virtualSplit.getPath(), l, newEnd, virtualSplit.getLocations()));
        }
        return filteredSplits;
    }

    private static boolean overlaps(long start, long end, long start2, long end2) {
        return start2 >= start && start2 <= end || end2 >= start && end2 <= end || start >= start2 && end <= end2;
    }
}

