/*
 * Decompiled with CFR 0.152.
 */
package banner.eval.dataset;

import banner.eval.dataset.Dataset;
import banner.tokenization.Tokenizer;
import banner.types.EntityType;
import banner.types.Mention;
import banner.types.Sentence;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.SubnodeConfiguration;

public class AZDCDataset
extends Dataset {
    public AZDCDataset(Tokenizer tokenizer) {
        this.tokenizer = tokenizer;
    }

    public AZDCDataset() {
    }

    @Override
    public void load(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(this.getClass().getPackage().getName());
        String sentenceFilename = localConfig.getString("sentenceFilename");
        String mentionsFilename = localConfig.getString("mentionTestFilename");
        try {
            BufferedReader mentionTestFile = new BufferedReader(new FileReader(mentionsFilename));
            HashMap<String, LinkedList<Dataset.Tag>> tags = this.getTags(mentionTestFile);
            mentionTestFile.close();
            HashMap<String, Set<String>> nameToId = new HashMap<String, Set<String>>();
            HashMap<String, Set<String>> idToName = new HashMap<String, Set<String>>();
            BufferedReader sentenceFile = new BufferedReader(new FileReader(sentenceFilename));
            String line = sentenceFile.readLine();
            line = sentenceFile.readLine();
            while (line != null) {
                String[] split = line.split("\\t");
                String id = split[0] + "-" + split[1] + "-" + split[2];
                String sentenceText = split[3];
                Sentence sentence = this.getSentence(id, split[1], sentenceText, this.tokenizer, tags, nameToId, idToName);
                this.sentences.add(sentence);
                line = sentenceFile.readLine();
            }
            sentenceFile.close();
            int[] idCountForName = new int[20];
            for (Object name : nameToId.keySet()) {
                int n = ((Set)nameToId.get(name)).size();
                idCountForName[n] = idCountForName[n] + 1;
                if (((Set)nameToId.get(name)).size() <= 2) continue;
                System.out.println("Name " + (String)name + " has " + ((Set)nameToId.get(name)).size() + " IDs: " + nameToId.get(name));
            }
            System.out.print("idCountForName: [");
            for (int i = 0; i < idCountForName.length; ++i) {
                System.out.print(idCountForName[i] + ", ");
            }
            System.out.println("]");
            int[] nameCountForId = new int[25];
            for (String id : idToName.keySet()) {
                if (id.equals("[]")) continue;
                int n = ((Set)idToName.get(id)).size();
                nameCountForId[n] = nameCountForId[n] + 1;
                if (((Set)idToName.get(id)).size() <= 5) continue;
                System.out.println("ID " + id + " has " + ((Set)idToName.get(id)).size() + " names: " + idToName.get(id));
            }
            System.out.print("nameCountForId: [");
            for (int i = 0; i < nameCountForId.length; ++i) {
                System.out.print(nameCountForId[i] + ", ");
            }
            System.out.println("]");
            System.out.println("Number of names without annotations: " + ((Set)idToName.get("[]")).size());
            System.out.println("Number of ids represented: " + idToName.size());
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    protected HashMap<String, LinkedList<Dataset.Tag>> getTags(BufferedReader tagFile) throws IOException {
        HashMap<String, LinkedList<Dataset.Tag>> tags = new HashMap<String, LinkedList<Dataset.Tag>>();
        String line = tagFile.readLine();
        line = tagFile.readLine();
        int i = 0;
        while (line != null) {
            String[] split = line.split("\\t");
            boolean valid = split.length >= 6;
            valid = valid && split[4].length() > 0;
            valid = valid && split[5].length() > 0;
            valid = valid && split[4].matches("\\d+");
            valid = valid && split[5].matches("\\d+");
            boolean bl = valid = valid && Integer.parseInt(split[5]) > Integer.parseInt(split[4]);
            if (valid) {
                String uniqueId = split[0] + "-" + split[1] + "-" + split[2];
                LinkedList<Dataset.Tag> tagList = tags.get(uniqueId);
                if (tagList == null) {
                    tagList = new LinkedList();
                }
                Dataset.Tag tag = new Dataset.Tag(EntityType.getType("DISE"), Integer.parseInt(split[4]), Integer.parseInt(split[5]));
                Iterator tagIterator = tagList.iterator();
                boolean add = true;
                while (tagIterator.hasNext() && add) {
                    Dataset.Tag tag2 = (Dataset.Tag)tagIterator.next();
                    if (tag.contains(tag2)) {
                        tagIterator.remove();
                        continue;
                    }
                    if (!tag2.contains(tag)) continue;
                    add = false;
                }
                if (add) {
                    if (split.length >= 10) {
                        String[] split2;
                        for (String id : split2 = split[9].split("[\\s-,\\[\\]]+")) {
                            if (id.length() <= 0) continue;
                            tag.addId(id);
                        }
                    }
                    tagList.add(tag);
                    tags.put(uniqueId, tagList);
                }
            }
            line = tagFile.readLine();
            ++i;
        }
        return tags;
    }

    protected Sentence getSentence(String sentenceId, String documentId, String sentenceText, Tokenizer tokenizer, HashMap<String, LinkedList<Dataset.Tag>> tags, Map<String, Set<String>> nameToId, Map<String, Set<String>> idToName) {
        Sentence sentence = new Sentence(sentenceId, documentId, sentenceText);
        tokenizer.tokenize(sentence);
        LinkedList<Dataset.Tag> tagList = tags.get(sentenceId);
        if (tagList != null) {
            for (Dataset.Tag tag : tagList) {
                int tagstart = tag.start - 1;
                int tagend = tag.end;
                int start = sentence.getTokenIndex(tagstart, true);
                assert (start >= 0);
                int end = sentence.getTokenIndex(tagend, false);
                assert (end > start);
                Mention mention = new Mention(sentence, start, end, tag.type, Mention.MentionType.Required);
                sentence.addMention(mention);
                String name = mention.getText().toLowerCase();
                String tagId = tag.getIds().toString();
                Set<String> idsForName = nameToId.get(name);
                if (idsForName == null) {
                    idsForName = new HashSet<String>();
                    nameToId.put(name, idsForName);
                }
                idsForName.add(tagId);
                Set<String> namesForId = idToName.get(tagId);
                if (namesForId == null) {
                    namesForId = new HashSet<String>();
                    idToName.put(tagId, namesForId);
                }
                namesForId.add(name);
            }
        }
        return sentence;
    }

    @Override
    public List<Dataset> split(int n) {
        ArrayList<Dataset> splitDatasets = new ArrayList<Dataset>();
        for (int i = 0; i < n; ++i) {
            splitDatasets.add(new AZDCDataset(this.tokenizer));
        }
        ArrayList splitAbstractIds = new ArrayList();
        for (int i = 0; i < n; ++i) {
            splitAbstractIds.add(new HashSet());
        }
        Random r = new Random();
        for (Sentence sentence : this.sentences) {
            String abstractId = sentence.getDocumentId();
            int num = -1;
            for (int i = 0; i < n && num == -1; ++i) {
                if (!((Set)splitAbstractIds.get(i)).contains(abstractId)) continue;
                num = i;
            }
            if (num == -1) {
                num = r.nextInt(n);
            }
            ((Dataset)splitDatasets.get((int)num)).sentences.add(sentence);
            ((Set)splitAbstractIds.get(num)).add(abstractId);
        }
        return splitDatasets;
    }
}

