/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources;

import de.julielab.genemapper.composites.CompositeMentionTokenizer;
import de.julielab.genemapper.composites.CompositeToken;
import de.julielab.java.utilities.FileUtilities;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;

public class SimConceptCorpusToIOBConverter {
    public static void main(String[] args) throws IOException {
        SimConceptCorpusToIOBConverter converter = new SimConceptCorpusToIOBConverter();
        converter.convertPubTatorFormat(new File("/Users/faessler/Downloads/SimConcept/corpus/Disease.txt"), new File("simconceptChemical.iob"), new File("jcore-gene-mapper-ae/data/eval_data/bc2_data/test/test.genelist"));
    }

    public void convertPubTatorFormat(File simconceptInputFile, File iobOutputFile, File blacklistDocIdsFile) throws IOException {
        Set blacklist = Collections.emptySet();
        if (blacklistDocIdsFile != null) {
            try (BufferedReader br = FileUtilities.getReaderFromFile((File)blacklistDocIdsFile);){
                blacklist = br.lines().map(line -> line.split("\\t")).map(split -> split[0]).collect(Collectors.toSet());
            }
        }
        CompositeMentionTokenizer tokenizer = new CompositeMentionTokenizer();
        Set finalBlackList = blacklist;
        try (BufferedReader br = FileUtilities.getReaderFromFile((File)simconceptInputFile);
             BufferedWriter bw = FileUtilities.getWriterToFile((File)iobOutputFile);){
            List geneList = br.lines().filter(line -> line.matches("^[0-9]+\t.*")).map(line -> line.split("\t")).filter(split -> !finalBlackList.contains(split[0])).map(split -> {
                split[3] = tokenizer.tokenize(split[3]).map(CompositeToken::getText).collect(Collectors.joining(" "));
                return split;
            }).map(split -> new ImmutablePair((Object)split[3], (Object)split[6])).collect(Collectors.toList());
            for (Pair p : geneList) {
                String[] split2 = ((String)p.getLeft()).split("\\s");
                if (split2.length != ((String)p.getRight()).length()) {
                    throw new IllegalStateException("The subtoken annotation does not match the created annotation: " + p);
                }
                char lastLabel = 'X';
                for (int i = 0; i < split2.length; ++i) {
                    String s = split2[i];
                    char c = ((String)p.getRight()).charAt(i);
                    char iobLabel = c == lastLabel ? (char)'I' : 'B';
                    bw.write(s);
                    bw.write("\t");
                    bw.write(iobLabel + "-" + c);
                    bw.newLine();
                    lastLabel = c;
                }
                bw.newLine();
            }
        }
    }
}

