package com.google.allenday.genomics.core.processing;

import com.google.allenday.genomics.core.io.FastqReader;
import com.google.allenday.genomics.core.io.FileUtils;
import com.google.allenday.genomics.core.io.GCSService;
import com.google.allenday.genomics.core.io.IoUtils;
import com.google.allenday.genomics.core.io.TransformIoHandler;
import com.google.allenday.genomics.core.model.FileWrapper;
import com.google.allenday.genomics.core.model.Instrument;
import com.google.allenday.genomics.core.model.SampleMetaData;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.FlatMapElements;
import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.InferableFunction;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.SimpleFunction;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.TypeDescriptor;
import org.apache.beam.sdk.values.TypeDescriptors;
import org.javatuples.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/google/allenday/genomics/core/processing/SplitFastqIntoBatches.class */
public class SplitFastqIntoBatches extends PTransform<PCollection<KV<SampleMetaData, List<FileWrapper>>>, PCollection<KV<SampleMetaData, List<FileWrapper>>>> {
    private ReadFastqPartFn readFastqPartFn;
    private BuildFastqContentFn buildFastqContentFn;
    private long maxContentSizeMb;

    /* loaded from: input_file:com/google/allenday/genomics/core/processing/SplitFastqIntoBatches$BuildFastqContentFn.class */
    public static class BuildFastqContentFn extends DoFn<KV<SampleMetaData, Iterable<Iterable<KV<FileWrapper, Integer>>>>, KV<SampleMetaData, List<FileWrapper>>> {
        private static final Logger LOG = LoggerFactory.getLogger(BuildFastqContentFn.class);
        private TransformIoHandler buildFastqContentIoHandler;
        private FileUtils fileUtils;
        private IoUtils ioUtils;
        private int maxContentSizeMb;
        GCSService gcsService;

        public BuildFastqContentFn(TransformIoHandler transformIoHandler, FileUtils fileUtils, IoUtils ioUtils, int i) {
            this.buildFastqContentIoHandler = transformIoHandler;
            this.fileUtils = fileUtils;
            this.ioUtils = ioUtils;
            this.maxContentSizeMb = i;
        }

        private KV<SampleMetaData, List<FileWrapper>> generateOutput(SampleMetaData sampleMetaData, TransformIoHandler transformIoHandler, int i, List<StringBuilder> list) {
            SampleMetaData cloneWithNewSubPartIndex = sampleMetaData.cloneWithNewSubPartIndex(i);
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < list.size(); i2++) {
                try {
                    arrayList.add(transformIoHandler.handleContentOutput(this.gcsService, list.get(i2).toString().getBytes(), sampleMetaData.getRunId() + String.format("_subpart_%d", Integer.valueOf(i)) + String.format("__%d.fastq", Integer.valueOf(i2 + 1))));
                } catch (IOException e) {
                    LOG.error(e.getMessage());
                }
            }
            LOG.info(String.format("Built files %s", arrayList.stream().map((v0) -> {
                return v0.getFileName();
            }).collect(Collectors.joining(", "))));
            return KV.of(cloneWithNewSubPartIndex, arrayList);
        }

        @DoFn.Setup
        public void setUp() {
            this.gcsService = GCSService.initialize(new FileUtils());
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<KV<SampleMetaData, Iterable<Iterable<KV<FileWrapper, Integer>>>>, KV<SampleMetaData, List<FileWrapper>>>.ProcessContext processContext) {
            KV kv = (KV) processContext.element();
            long currentTimeMillis = System.currentTimeMillis();
            SampleMetaData sampleMetaData = (SampleMetaData) kv.getKey();
            LOG.info(String.format("Start building content of %s", sampleMetaData.getRunId()));
            ArrayList<StringBuilder> arrayList = new ArrayList<StringBuilder>() { // from class: com.google.allenday.genomics.core.processing.SplitFastqIntoBatches.BuildFastqContentFn.1
                {
                    add(new StringBuilder());
                }
            };
            if (sampleMetaData.isPaired()) {
                arrayList.add(new StringBuilder());
            }
            int mbToBytes = (int) this.fileUtils.mbToBytes(this.maxContentSizeMb);
            int i = 0;
            Iterator it = ((Iterable) kv.getValue()).iterator();
            while (it.hasNext()) {
                List list = (List) StreamSupport.stream(((Iterable) it.next()).spliterator(), false).sorted(Comparator.comparing((v0) -> {
                    return v0.getValue();
                })).map((v0) -> {
                    return v0.getKey();
                }).collect(Collectors.toList());
                List list2 = (List) list.stream().map(fileWrapper -> {
                    return new String(this.buildFastqContentIoHandler.handleInputAsContent(this.gcsService, fileWrapper, this.ioUtils));
                }).collect(Collectors.toList());
                int i2 = 0;
                while (true) {
                    if (i2 >= list.size()) {
                        break;
                    }
                    if (arrayList.get(i2).length() + ((String) list2.get(i2)).length() > mbToBytes) {
                        processContext.output(generateOutput(sampleMetaData, this.buildFastqContentIoHandler, i, arrayList));
                        i++;
                        arrayList.forEach(sb -> {
                            sb.setLength(0);
                        });
                        break;
                    }
                    i2++;
                }
                for (int i3 = 0; i3 < list.size(); i3++) {
                    arrayList.get(i3).append((String) list2.get(i3));
                }
            }
            LOG.info(String.format("Finish building of %s in %d", sampleMetaData.getRunId(), Long.valueOf(System.currentTimeMillis() - currentTimeMillis)));
            processContext.output(generateOutput(sampleMetaData, this.buildFastqContentIoHandler, i, arrayList));
        }
    }

    /* loaded from: input_file:com/google/allenday/genomics/core/processing/SplitFastqIntoBatches$ReadFastqPartFn.class */
    public static class ReadFastqPartFn extends DoFn<KV<KV<SampleMetaData, String>, Iterable<KV<FileWrapper, Integer>>>, KV<SampleMetaData, KV<FileWrapper, Integer>>> {
        private static final Logger LOG = LoggerFactory.getLogger(ReadFastqPartFn.class);
        private FileUtils fileUtils;
        private FastqReader fastqReader;
        private int chunkSizeCount;
        private int maxFastqSizeMB;
        private TransformIoHandler splitFastqIntoBatchesIoHandler;
        GCSService gcsService;

        public ReadFastqPartFn(FileUtils fileUtils, FastqReader fastqReader, TransformIoHandler transformIoHandler, int i, int i2) {
            this.fileUtils = fileUtils;
            this.fastqReader = fastqReader;
            this.chunkSizeCount = i;
            this.splitFastqIntoBatchesIoHandler = transformIoHandler;
            this.maxFastqSizeMB = i2;
        }

        @DoFn.Setup
        public void setUp() {
            this.gcsService = GCSService.initialize(new FileUtils());
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<KV<KV<SampleMetaData, String>, Iterable<KV<FileWrapper, Integer>>>, KV<SampleMetaData, KV<FileWrapper, Integer>>>.ProcessContext processContext) {
            KV kv = (KV) processContext.element();
            Optional findFirst = StreamSupport.stream(((Iterable) kv.getValue()).spliterator(), false).findFirst();
            if (this.maxFastqSizeMB > 0) {
                this.splitFastqIntoBatchesIoHandler.setMemoryOutputLimitMb(0L);
            }
            SampleMetaData sampleMetaData = (SampleMetaData) ((KV) kv.getKey()).getKey();
            if (sampleMetaData != null) {
                try {
                    this.chunkSizeCount /= Instrument.valueOf(sampleMetaData.getPlatform()).sizeMultiplier;
                    if (this.chunkSizeCount < 1) {
                        this.chunkSizeCount = 1;
                    }
                    findFirst.ifPresent(kv2 -> {
                        FileWrapper fileWrapper = (FileWrapper) kv2.getKey();
                        int intValue = ((Integer) kv2.getValue()).intValue();
                        Pair<String, String> splitFilenameAndExtension = this.fileUtils.splitFilenameAndExtension(fileWrapper.getFileName());
                        if (fileWrapper.getDataType() == FileWrapper.DataType.BLOB_URI) {
                            ReadableByteChannel blobReader = this.gcsService.getBlobReader(this.gcsService.getBlobIdFromUri(fileWrapper.getBlobUri()));
                            LOG.info(String.format("Working with %s", fileWrapper.getBlobUri()));
                            try {
                                this.fastqReader.readFastqBlobWithReadCountLimit(blobReader, this.chunkSizeCount, (str, i) -> {
                                    LOG.info(String.format("Receive new part of %s with index %d, size %s", fileWrapper.getBlobUri(), Integer.valueOf(i), Integer.valueOf(str.getBytes().length)));
                                    try {
                                        processContext.output(KV.of(sampleMetaData.cloneWithNewPartIndex(i), KV.of(this.splitFastqIntoBatchesIoHandler.handleContentOutput(this.gcsService, str.getBytes(), ((String) splitFilenameAndExtension.getValue0()) + "_" + i + ((String) splitFilenameAndExtension.getValue1())), Integer.valueOf(intValue))));
                                    } catch (IOException e) {
                                        e.printStackTrace();
                                    }
                                });
                            } catch (IOException e) {
                                LOG.error(e.getMessage());
                            }
                        }
                    });
                } catch (IllegalArgumentException e) {
                    LOG.error(e.getMessage());
                }
            }
        }
    }

    public SplitFastqIntoBatches(ReadFastqPartFn readFastqPartFn, BuildFastqContentFn buildFastqContentFn, long j) {
        this.readFastqPartFn = readFastqPartFn;
        this.buildFastqContentFn = buildFastqContentFn;
        this.maxContentSizeMb = j;
    }

    public PCollection<KV<SampleMetaData, List<FileWrapper>>> expand(PCollection<KV<SampleMetaData, List<FileWrapper>>> pCollection) {
        PCollection apply = pCollection.apply("Split pairs files into separate threads", FlatMapElements.via(new InferableFunction<KV<SampleMetaData, List<FileWrapper>>, Iterable<KV<KV<SampleMetaData, String>, KV<FileWrapper, Integer>>>>() { // from class: com.google.allenday.genomics.core.processing.SplitFastqIntoBatches.1
            public Iterable<KV<KV<SampleMetaData, String>, KV<FileWrapper, Integer>>> apply(KV<SampleMetaData, List<FileWrapper>> kv) {
                ArrayList arrayList = new ArrayList();
                IntStream.range(0, ((List) kv.getValue()).size()).forEach(i -> {
                    FileWrapper fileWrapper = (FileWrapper) ((List) kv.getValue()).get(i);
                    arrayList.add(KV.of(KV.of(kv.getKey(), fileWrapper.getFileName()), KV.of(fileWrapper, Integer.valueOf(i))));
                });
                return arrayList;
            }
        })).apply("Group by fastq URI", GroupByKey.create()).apply(ParDo.of(this.readFastqPartFn)).apply("Group by part index", GroupByKey.create());
        return this.maxContentSizeMb > 0 ? apply.apply(MapElements.via(new SimpleFunction<KV<SampleMetaData, Iterable<KV<FileWrapper, Integer>>>, KV<SampleMetaData, Iterable<KV<FileWrapper, Integer>>>>() { // from class: com.google.allenday.genomics.core.processing.SplitFastqIntoBatches.2
            public KV<SampleMetaData, Iterable<KV<FileWrapper, Integer>>> apply(KV<SampleMetaData, Iterable<KV<FileWrapper, Integer>>> kv) {
                SampleMetaData sampleMetaData = (SampleMetaData) kv.getKey();
                return KV.of(sampleMetaData.cloneWithNewPartIndex(0), (Iterable) kv.getValue());
            }
        })).apply("Group group chunks by original fastq", GroupByKey.create()).apply(ParDo.of(this.buildFastqContentFn)).apply("Split into separate threads", GroupByKey.create()).apply("Get first element from iterable", MapElements.into(TypeDescriptors.kvs(TypeDescriptor.of(SampleMetaData.class), TypeDescriptors.lists(TypeDescriptor.of(FileWrapper.class)))).via(kv -> {
            return KV.of(kv.getKey(), ((Iterable) kv.getValue()).iterator().next());
        })) : apply.apply(MapElements.via(new SimpleFunction<KV<SampleMetaData, Iterable<KV<FileWrapper, Integer>>>, KV<SampleMetaData, List<FileWrapper>>>() { // from class: com.google.allenday.genomics.core.processing.SplitFastqIntoBatches.3
            public KV<SampleMetaData, List<FileWrapper>> apply(KV<SampleMetaData, Iterable<KV<FileWrapper, Integer>>> kv2) {
                return KV.of(kv2.getKey(), (List) StreamSupport.stream(((Iterable) kv2.getValue()).spliterator(), false).sorted(Comparator.comparing((v0) -> {
                    return v0.getValue();
                })).map((v0) -> {
                    return v0.getKey();
                }).collect(Collectors.toList()));
            }
        }));
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case 1206095065:
                if (implMethodName.equals("lambda$expand$f4630d8$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/beam/sdk/transforms/SerializableFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("apply") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("com/google/allenday/genomics/core/processing/SplitFastqIntoBatches") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/beam/sdk/values/KV;)Lorg/apache/beam/sdk/values/KV;")) {
                    return kv -> {
                        return KV.of(kv.getKey(), ((Iterable) kv.getValue()).iterator().next());
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
