/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources;

import com.google.inject.Guice;
import com.google.inject.Injector;
import de.julielab.geneexpbase.data.DocumentLoader;
import de.julielab.geneexpbase.data.DocumentLoadingException;
import de.julielab.geneexpbase.data.DocumentSourceFileRegistry;
import de.julielab.geneexpbase.data.DocumentSourceFiles;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.ioc.ServicesShutdownHub;
import de.julielab.genemapper.Configuration;
import de.julielab.genemapper.GeneMapper;
import de.julielab.genemapper.classification.TransformerDisambiguationDataUtils;
import de.julielab.genemapper.ioc.GeneMappingModule;
import de.julielab.genemapper.utils.GeneMapperException;
import de.julielab.genemapper.utils.GeneMapperInitializationException;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TransformerDisambiguationDataWriter {
    private static final Logger log = LoggerFactory.getLogger(TransformerDisambiguationDataWriter.class);

    public static void main(String[] args) throws IOException, GeneMapperInitializationException, ExecutionException, GeneMapperException, DocumentLoadingException {
        Configuration configuration = new Configuration(new File("configurations/genemapper_transformer_data.properties"));
        Injector injector = Guice.createInjector(new GeneMappingModule(configuration));
        GeneMapper geneMapper = injector.getInstance(GeneMapper.class);
        DocumentLoader documentLoader = injector.getInstance(DocumentLoader.class);
        String goldTaxMode = "goldTax";
        String matchMode = "onlyApproxMatches";
        String fpMentionMode = "includeFpMentions";
        DocumentSourceFiles documentSourceFiles = DocumentSourceFileRegistry.gnpBc2gnTest();
        File corpusSplitMapping = null;
        File outputFile = new File("transformerDisambiguationData-bc2test-v23-" + goldTaxMode + "-" + matchMode + "-" + fpMentionMode + ".tsv");
        TransformerDisambiguationDataWriter.createDisambiguationData(documentSourceFiles, documentLoader, geneMapper, outputFile, corpusSplitMapping);
        injector.getInstance(ServicesShutdownHub.class).shutdown();
        log.info("Data creation complete.");
    }

    public static void createDisambiguationData(DocumentSourceFiles sourceFiles, DocumentLoader documentLoader, GeneMapper mapper, File outputFile, File corpusSplitMapping) throws IOException, ExecutionException, DocumentLoadingException, GeneMapperException {
        String outputPath = outputFile.getAbsolutePath();
        List documents = documentLoader.getDocuments(sourceFiles).collect(Collectors.toList());
        if (corpusSplitMapping != null) {
            List<String> dataSplitLines = FileUtils.readLines(corpusSplitMapping, StandardCharsets.UTF_8);
            log.info("Read {} document IDs from {}", (Object)dataSplitLines.size(), (Object)corpusSplitMapping);
            Set devDocIds = dataSplitLines.stream().map(line -> line.split("\\s+")).filter(s2 -> s2[1].equals("dev")).map(s2 -> s2[0]).collect(Collectors.toSet());
            File devFile = new File(outputPath.substring(0, outputPath.lastIndexOf(46)) + "-dev.tsv");
            log.info("Got {} dev docs from {} that will be omitted from the training data and written to {}.", devDocIds.size(), corpusSplitMapping, devFile);
            Stream<GeneDocument> trainStream = documents.stream().filter(d -> !devDocIds.contains(d.getId()));
            Stream<GeneDocument> devStream = documents.stream().filter(d -> devDocIds.contains(d.getId()));
            log.info("Writing transformer training data for corpus {} to {}", (Object)sourceFiles.getName(), (Object)outputFile);
            TransformerDisambiguationDataUtils.writeData(mapper, outputFile, trainStream);
            TransformerDisambiguationDataUtils.writeData(mapper, devFile, devStream);
        } else {
            TransformerDisambiguationDataUtils.writeData(mapper, outputFile, documents.stream());
        }
    }
}

