/*
 * Decompiled with CFR 0.152.
 */
package banner.eval;

import banner.eval.dataset.Dataset;
import banner.eval.uima.Util;
import banner.postprocessing.FlattenPostProcessor;
import banner.postprocessing.LocalAbbreviationPostProcessor;
import banner.postprocessing.ParenthesisPostProcessor;
import banner.postprocessing.PostProcessor;
import banner.postprocessing.SequentialPostProcessor;
import banner.tagging.CRFTagger;
import banner.tagging.FeatureSet;
import banner.tagging.TagFormat;
import banner.tagging.Tagger;
import banner.tagging.dictionary.DictionaryTagger;
import banner.tokenization.Tokenizer;
import banner.types.EntityType;
import banner.types.Mention;
import banner.types.Sentence;
import banner.types.Token;
import banner.util.CollectionsRand;
import banner.util.RankedList;
import dragon.nlp.tool.HeppleTagger;
import dragon.nlp.tool.Lemmatiser;
import dragon.nlp.tool.MedPostTagger;
import dragon.nlp.tool.lemmatiser.EngLemmatiser;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.SubnodeConfiguration;
import org.apache.commons.configuration.XMLConfiguration;

public class BANNER {
    public static void main(String[] args) throws ConfigurationException, IOException {
        if (args.length == 0) {
            System.out.println("Usage: banner.sh <command> <configuration> <parameters>");
            System.out.println("Exceute \"banner.sh help\" for details");
        } else {
            switch (Function.valueOf(args[0])) {
                case help: {
                    System.out.println("Commands:");
                    System.out.println("help: Prints this help text");
                    System.out.println("tag: Uses a trained model to tag sentences from an input file");
                    System.out.println("\tUsage: banner.sh tag config.xml sentences.txt");
                    System.out.println("\t   Or: banner.sh tag config.xml sentences.txt 0.2");
                    System.out.println("test: Evaluates a previously trained model against the test data set up in the config file");
                    System.out.println("\tUsage: banner.sh test config.xml");
                    System.out.println("\t   Or: banner.sh test config.xml 0.2");
                    System.out.println("train: Uses the training data and configuration from the config file to create a new model");
                    System.out.println("\tUsage: banner.sh train config.xml");
                    System.out.println("\t   Or: banner.sh train config.xml 0.2");
                    System.out.println("eval5by2: Performs a 5 by 2 cross-validation on the data set up in the config file");
                    System.out.println("\tUsage: banner.sh eval5by2 config.xml");
                    System.out.println("\t   Or: banner.sh eval5by2 config.xml 0.2");
                    System.out.println("eval10Fold: Performs a 10 fold cross-validation on the data set up in the config file");
                    System.out.println("\tUsage: banner.sh eval10Fold config.xml");
                    System.out.println("\t   Or: banner.sh eval10Fold config.xml 0.2");
                    break;
                }
                case tag: {
                    BANNER.tag((HierarchicalConfiguration)new XMLConfiguration(args[1]), args[2], args.length > 3 ? Double.valueOf(args[3]) : null);
                    break;
                }
                case test: {
                    BANNER.test((HierarchicalConfiguration)new XMLConfiguration(args[1]));
                    break;
                }
                case train: {
                    BANNER.train((HierarchicalConfiguration)new XMLConfiguration(args[1]), args.length > 2 ? Double.valueOf(args[2]) : null);
                    break;
                }
                case eval5by2: {
                    BANNER.eval5by2((HierarchicalConfiguration)new XMLConfiguration(args[1]), args.length > 2 ? Double.valueOf(args[2]) : null);
                    break;
                }
                case eval10Fold: {
                    BANNER.eval10Fold((HierarchicalConfiguration)new XMLConfiguration(args[1]), args.length > 2 ? Double.valueOf(args[2]) : null);
                    break;
                }
                case describeDataset: {
                    BANNER.describeDataset((HierarchicalConfiguration)new XMLConfiguration(args[1]));
                    break;
                }
                case testDict: {
                    BANNER.testDict((HierarchicalConfiguration)new XMLConfiguration(args[1]));
                    break;
                }
                default: {
                    System.out.println("Unrecognized command \"" + args[0] + "\"; use \"help\" for a list of valid commands");
                }
            }
        }
    }

    private static void tag(HierarchicalConfiguration config, String sentenceFilename, Double percentage) throws IOException {
        long start = System.currentTimeMillis();
        Tokenizer tokenizer = BANNER.getTokenizer(config);
        DictionaryTagger dictionary = BANNER.getDictionary(config);
        EngLemmatiser lemmatiser = BANNER.getLemmatiser(config);
        dragon.nlp.tool.Tagger posTagger = BANNER.getPosTagger(config);
        PostProcessor postProcessor = BANNER.getPostProcessor(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String modelFilename = localConfig.getString("modelFilename");
        System.out.println("Model: " + modelFilename);
        CRFTagger tagger = CRFTagger.load(BANNER.class.getClass().getResourceAsStream(modelFilename), (Lemmatiser)lemmatiser, posTagger, dictionary);
        System.out.println("Completed input: " + (System.currentTimeMillis() - start) + "ms");
        BufferedReader reader = new BufferedReader(new FileReader(sentenceFilename));
        String line = reader.readLine();
        while (line != null) {
            if ((line = line.trim()).length() > 0) {
                String[] split = line.split("\\t");
                Sentence sentence = new Sentence(split[0], "", split[1]);
                sentence = BANNER.process(tagger, tokenizer, postProcessor, sentence);
                for (Mention mention : sentence.getMentions()) {
                    StringBuilder output = new StringBuilder();
                    output.append(split[0]);
                    output.append("\t");
                    output.append(mention.getEntityType());
                    output.append("\t");
                    output.append(mention.getStartChar());
                    output.append("\t");
                    output.append(mention.getEndChar());
                    output.append("\t");
                    output.append(mention.getText());
                    System.out.println(output.toString());
                }
            }
            line = reader.readLine();
        }
        reader.close();
    }

    public static void train(HierarchicalConfiguration config, Double percentage) throws ConfigurationException, IOException {
        long start = System.currentTimeMillis();
        Dataset dataset = BANNER.getDataset(config);
        TagFormat tagFormat = BANNER.getTagFormat(config);
        DictionaryTagger dictionary = BANNER.getDictionary(config);
        int crfOrder = BANNER.getCRFOrder(config);
        System.out.println("tagformat=" + (Object)((Object)tagFormat));
        System.out.println("crfOrder=" + crfOrder);
        EngLemmatiser lemmatiser = BANNER.getLemmatiser(config);
        dragon.nlp.tool.Tagger posTagger = BANNER.getPosTagger(config);
        Set<Mention.MentionType> mentionTypes = BANNER.getMentionTypes(config);
        Sentence.OverlapOption sameTypeOverlapOption = BANNER.getSameTypeOverlapOption(config);
        Sentence.OverlapOption differentTypeOverlapOption = BANNER.getDifferentTypeOverlapOption(config);
        String simFindFilename = BANNER.getSimFindFilename(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String modelFilename = localConfig.getString("modelFilename");
        Set<Sentence> sentences = dataset.getSentences();
        if (percentage != null) {
            sentences = CollectionsRand.randomSubset(sentences, percentage);
        }
        BANNER.logInput(sentences, config);
        System.out.println("Completed input: " + (System.currentTimeMillis() - start) + "ms");
        System.out.println("Training data loaded, starting training");
        FeatureSet featureSet = new FeatureSet(tagFormat, (Lemmatiser)lemmatiser, posTagger, dictionary, simFindFilename, mentionTypes, sameTypeOverlapOption, differentTypeOverlapOption);
        CRFTagger tagger = CRFTagger.train(sentences, crfOrder, tagFormat, featureSet);
        System.out.println("Training complete, saving model");
        tagger.write(new File(modelFilename));
    }

    public static void test(HierarchicalConfiguration config) throws ConfigurationException, IOException {
        long start = System.currentTimeMillis();
        Dataset dataset = BANNER.getDataset(config);
        DictionaryTagger dictionary = BANNER.getDictionary(config);
        EngLemmatiser lemmatiser = BANNER.getLemmatiser(config);
        dragon.nlp.tool.Tagger posTagger = BANNER.getPosTagger(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String modelFilename = localConfig.getString("modelFilename");
        System.out.println("Model: " + modelFilename);
        BANNER.logInput(dataset.getSentences(), config);
        System.out.println("Completed input: " + (System.currentTimeMillis() - start));
        CRFTagger tagger = CRFTagger.load(new FileInputStream(new File(modelFilename)), (Lemmatiser)lemmatiser, posTagger, dictionary);
        Performance performance = BANNER.test(dataset, tagger, config);
        performance.print();
        System.out.println("Elapsed time: " + (System.currentTimeMillis() - start) + "ms");
    }

    public static void eval5by2(HierarchicalConfiguration config, Double percentage) throws ConfigurationException, IOException {
        long start = System.currentTimeMillis();
        Dataset dataset = BANNER.getDataset(config);
        Map<EntityType, Integer> typeCounts = dataset.getTypeCounts();
        for (EntityType type : typeCounts.keySet()) {
            System.out.println(type.toString() + ", count=" + typeCounts.get(type));
        }
        TagFormat tagFormat = BANNER.getTagFormat(config);
        DictionaryTagger dictionary = BANNER.getDictionary(config);
        int crfOrder = BANNER.getCRFOrder(config);
        EngLemmatiser lemmatiser = BANNER.getLemmatiser(config);
        dragon.nlp.tool.Tagger posTagger = BANNER.getPosTagger(config);
        Set<Mention.MentionType> mentionTypes = BANNER.getMentionTypes(config);
        Sentence.OverlapOption sameTypeOverlapOption = BANNER.getSameTypeOverlapOption(config);
        Sentence.OverlapOption differentTypeOverlapOption = BANNER.getDifferentTypeOverlapOption(config);
        String simFindFilename = BANNER.getSimFindFilename(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String modelFilename = localConfig.getString("modelFilename");
        for (int run = 0; run < 5; ++run) {
            start = System.currentTimeMillis();
            List<Dataset> splitDataset = dataset.split(2);
            System.out.println("Created folds for run " + run + ": " + (System.currentTimeMillis() - start));
            for (int cross = 0; cross < 2; ++cross) {
                Dataset dataset_A = splitDataset.get(cross);
                Dataset dataset_B = splitDataset.get(cross == 0 ? 1 : 0);
                Set<Sentence> sentences_A = dataset_A.getSentences();
                if (percentage != null) {
                    sentences_A = CollectionsRand.randomSubset(sentences_A, percentage);
                }
                String filenameSuffix = Integer.toString(run) + Integer.toString(cross);
                BANNER.logInput(sentences_A, config, filenameSuffix);
                start = System.currentTimeMillis();
                System.out.println("\tTraining data loaded, starting training");
                FeatureSet featureSet = new FeatureSet(tagFormat, (Lemmatiser)lemmatiser, posTagger, dictionary, simFindFilename, mentionTypes, sameTypeOverlapOption, differentTypeOverlapOption);
                CRFTagger tagger = CRFTagger.train(sentences_A, crfOrder, tagFormat, featureSet);
                System.out.println("Completed training for run " + run + " cross " + cross + ": " + (System.currentTimeMillis() - start));
                tagger.write(new File(BANNER.getFilename(modelFilename, filenameSuffix)));
                System.gc();
                start = System.currentTimeMillis();
                Performance performance = BANNER.test(dataset_B, tagger, config, filenameSuffix);
                performance.print();
                System.out.println("Completed testing for run " + run + " cross " + cross + ": " + (System.currentTimeMillis() - start) + "ms");
                tagger = null;
                System.gc();
            }
        }
    }

    public static void eval10Fold(HierarchicalConfiguration config, Double percentage) throws ConfigurationException, IOException {
        long start = System.currentTimeMillis();
        Dataset dataset = BANNER.getDataset(config);
        Map<EntityType, Integer> typeCounts = dataset.getTypeCounts();
        for (EntityType type : typeCounts.keySet()) {
            System.out.println(type.toString() + ", count=" + typeCounts.get(type));
        }
        TagFormat tagFormat = BANNER.getTagFormat(config);
        DictionaryTagger dictionary = BANNER.getDictionary(config);
        int crfOrder = BANNER.getCRFOrder(config);
        EngLemmatiser lemmatiser = BANNER.getLemmatiser(config);
        dragon.nlp.tool.Tagger posTagger = BANNER.getPosTagger(config);
        Set<Mention.MentionType> mentionTypes = BANNER.getMentionTypes(config);
        Sentence.OverlapOption sameTypeOverlapOption = BANNER.getSameTypeOverlapOption(config);
        Sentence.OverlapOption differentTypeOverlapOption = BANNER.getDifferentTypeOverlapOption(config);
        String simFindFilename = BANNER.getSimFindFilename(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String modelFilename = localConfig.getString("modelFilename");
        start = System.currentTimeMillis();
        List<Dataset> splitDataset = dataset.split(10);
        System.out.println("Created folds: " + (System.currentTimeMillis() - start));
        for (int cross = 0; cross < 10; ++cross) {
            ArrayList<Dataset> datasets = new ArrayList<Dataset>();
            for (int i = 0; i < 10; ++i) {
                if (i != cross) {
                    datasets.add(splitDataset.get(i));
                }
                System.out.println(splitDataset.get(i).getSentences().size());
            }
            DatasetCombiner dataset_A = new DatasetCombiner(datasets);
            Dataset dataset_B = splitDataset.get(cross);
            Set<Sentence> sentences_A = dataset_A.getSentences();
            if (percentage != null) {
                sentences_A = CollectionsRand.randomSubset(sentences_A, percentage);
            }
            BANNER.logInput(sentences_A, config, Integer.toString(cross));
            start = System.currentTimeMillis();
            System.out.println("\tTraining data loaded, starting training");
            FeatureSet featureSet = new FeatureSet(tagFormat, (Lemmatiser)lemmatiser, posTagger, dictionary, simFindFilename, mentionTypes, sameTypeOverlapOption, differentTypeOverlapOption);
            CRFTagger tagger = CRFTagger.train(sentences_A, crfOrder, tagFormat, featureSet);
            System.out.println("Completed training for cross " + cross + ": " + (System.currentTimeMillis() - start));
            tagger.write(new File(BANNER.getFilename(modelFilename, Integer.toString(cross))));
            System.gc();
            start = System.currentTimeMillis();
            Performance performance = BANNER.test(dataset_B, tagger, config, Integer.toString(cross));
            performance.print();
            System.out.println("Completed testing for cross " + cross + ": " + (System.currentTimeMillis() - start));
            tagger = null;
            System.gc();
        }
    }

    private static void describeDataset(HierarchicalConfiguration config) throws ConfigurationException, IOException {
        Dataset dataset = BANNER.getDataset(config);
        Set<Sentence> sentences = dataset.getSentences();
        BANNER.logInput(sentences, config);
        int tokenCount = 0;
        int mentionCount = 0;
        int[] sentenceLength = new int[100];
        Integer[] mentionFrequency = new Integer[15];
        for (int i = 0; i < mentionFrequency.length; ++i) {
            mentionFrequency[i] = new Integer(0);
        }
        Integer[] mentionLength = new Integer[50];
        for (int i = 0; i < mentionLength.length; ++i) {
            mentionLength[i] = new Integer(0);
        }
        HashSet<String> mentionTexts = new HashSet<String>();
        DictionaryTagger d = new DictionaryTagger();
        HashMap<String, Object> tokenFrequencies = new HashMap<String, Object>();
        HashMap<String, Count> tokenFrequenciesInMention = new HashMap<String, Count>();
        for (Sentence sentence : sentences) {
            int numTokens = sentence.getTokens().size();
            if (numTokens < sentenceLength.length) {
                int n = numTokens;
                sentenceLength[n] = sentenceLength[n] + 1;
            }
            tokenCount += numTokens;
            for (Token token : sentence.getTokens()) {
                Object count = (Count)tokenFrequencies.get(token.getText());
                if (count == null) {
                    count = new Count();
                    tokenFrequencies.put(token.getText(), count);
                }
                ((Count)count).incr();
            }
            List<Mention> mentions = sentence.getMentions();
            int numMentions = mentions.size();
            mentionCount += numMentions;
            if (numMentions < mentionFrequency.length) {
                mentionFrequency[numMentions] = new Integer(mentionFrequency[numMentions] + 1);
            }
            for (Mention mention : mentions) {
                if (mention.length() < mentionLength.length) {
                    Integer[] integerArray = mentionLength;
                    int n = mention.length();
                    Integer.valueOf(integerArray[n] + 1);
                }
                mentionTexts.add(mention.getText());
                ArrayList<String> tokens = new ArrayList<String>();
                for (Token token : mention.getTokens()) {
                    tokens.add(token.getText());
                }
                d.add(tokens, Collections.singleton(mention.getEntityType()));
                for (Token token : mention.getTokens()) {
                    Count count = (Count)tokenFrequenciesInMention.get(token.getText());
                    if (count == null) {
                        count = new Count();
                        tokenFrequenciesInMention.put(token.getText(), count);
                    }
                    count.incr();
                }
            }
        }
        System.out.println();
        System.out.println("Number of sentences: " + sentences.size());
        System.out.println("Number of tokens: " + tokenCount);
        System.out.println("Number of mentions: " + mentionCount);
        System.out.println("Number of sentences per sentence length: " + Arrays.toString(sentenceLength));
        System.out.println("Number of sentences per mention frequency: " + Arrays.asList(mentionFrequency));
        System.out.println("Number of mentions per mention length: " + Arrays.asList(mentionLength));
        System.out.println("Number of unique mention texts: " + mentionTexts.size());
        for (Sentence sentence : sentences) {
            Sentence sentence2 = sentence.copy(true, false);
            d.tag(sentence2);
        }
        System.out.println("Token frequencies:");
        RankedList<String> tokenFrequenciesList = new RankedList<String>(100);
        int[] tokenFreqenciesNonMention = new int[100];
        int[] tokenFreqenciesMention = new int[100];
        for (String token : tokenFrequencies.keySet()) {
            int countInMention;
            int count = ((Count)tokenFrequencies.get(token)).getCount();
            int countNonMention = Math.max(0, count - (countInMention = tokenFrequenciesInMention.get(token) == null ? 0 : ((Count)tokenFrequenciesInMention.get(token)).getCount()));
            if (countNonMention < tokenFreqenciesNonMention.length) {
                int n = countNonMention;
                tokenFreqenciesNonMention[n] = tokenFreqenciesNonMention[n] + 1;
            }
            if (countInMention < tokenFreqenciesMention.length) {
                int n = countInMention;
                tokenFreqenciesMention[n] = tokenFreqenciesMention[n] + 1;
            }
            if (count <= 5) continue;
            tokenFrequenciesList.add(1.0 - (double)countInMention / (double)count, token);
        }
        System.out.println("Number of tokens which appear in mentions with a specific frequency:" + Arrays.toString(tokenFreqenciesMention));
        System.out.println("Number of tokens which appear non mention with a specific frequency:" + Arrays.toString(tokenFreqenciesNonMention));
    }

    public static void testDict(HierarchicalConfiguration config) throws ConfigurationException, IOException {
        long start = System.currentTimeMillis();
        Dataset dataset = BANNER.getDataset(config);
        DictionaryTagger dictionary = BANNER.getDictionary(config);
        BANNER.logInput(dataset.getSentences(), config);
        System.out.println("Completed input: " + (System.currentTimeMillis() - start) + "ms");
        start = System.currentTimeMillis();
        Performance performance = BANNER.test(dataset, dictionary, config);
        performance.print();
        System.out.println("Completed tagging: " + (System.currentTimeMillis() - start));
    }

    public static void logInput(Set<Sentence> sentences, HierarchicalConfiguration config) throws IOException {
        BANNER.logInput(sentences, config, null);
    }

    private static void logInput(Set<Sentence> sentences, HierarchicalConfiguration config, String filenameSuffix) throws IOException {
        TagFormat tagFormat = BANNER.getTagFormat(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String idInputFilename = BANNER.getFilename(localConfig.getString("idInputFilename"), filenameSuffix);
        String rawInputFilename = BANNER.getFilename(localConfig.getString("rawInputFilename"), filenameSuffix);
        String trainingInputFilename = BANNER.getFilename(localConfig.getString("trainingInputFilename"), filenameSuffix);
        PrintWriter idFile = new PrintWriter(new BufferedWriter(new FileWriter(idInputFilename)));
        PrintWriter rawFile = new PrintWriter(new BufferedWriter(new FileWriter(rawInputFilename)));
        PrintWriter trainingFile = new PrintWriter(new BufferedWriter(new FileWriter(trainingInputFilename)));
        for (Sentence sentence : sentences) {
            idFile.println(sentence.getSentenceId());
            rawFile.println(sentence.getText());
            trainingFile.println(BANNER.getTrainingText(sentence, tagFormat, EnumSet.of(Mention.MentionType.Required), Sentence.OverlapOption.Raw, Sentence.OverlapOption.Raw));
        }
        idFile.close();
        rawFile.close();
        trainingFile.close();
    }

    public static String getTrainingText(Sentence sentence, TagFormat format, Set<Mention.MentionType> mentionTypes, Sentence.OverlapOption sameType, Sentence.OverlapOption differentType) {
        StringBuilder trainingText = new StringBuilder();
        List<String> labels = sentence.getTokenLabels(format, mentionTypes, sameType, differentType);
        List<Token> tokens = sentence.getTokens();
        for (int i = 0; i < tokens.size(); ++i) {
            Token token = tokens.get(i);
            trainingText.append(token.getText());
            trainingText.append("|");
            trainingText.append(labels.get(i));
            trainingText.append(" ");
        }
        return trainingText.toString().trim();
    }

    private static String getFilename(String originalFilename, String filenameSuffix) {
        if (originalFilename == null) {
            return null;
        }
        if (filenameSuffix == null) {
            return originalFilename;
        }
        int period = originalFilename.lastIndexOf(".");
        String name = originalFilename;
        String extension = "";
        if (period != -1) {
            name = originalFilename.substring(0, period);
            extension = originalFilename.substring(period);
        }
        return name + filenameSuffix + extension;
    }

    public static void outputMentions(Sentence sentence, PrintWriter mentionOutputFile, boolean onlyNonBlank, boolean ignoreWhitespace) {
        if (onlyNonBlank) {
            List<Token> tokens = sentence.getTokens();
            int charCount = 0;
            for (int i = 0; i < tokens.size(); ++i) {
                List<Mention> mentions = sentence.getMentions(tokens.get(i), EnumSet.of(Mention.MentionType.Required));
                assert (mentions.size() == 0 || mentions.size() == 1);
                Mention mention = null;
                if (mentions.size() > 0) {
                    mention = mentions.get(0);
                }
                if (mention != null && i == mention.getStart()) {
                    mentionOutputFile.print(sentence.getSentenceId());
                    mentionOutputFile.print("|");
                    mentionOutputFile.print(charCount);
                    mentionOutputFile.print(" ");
                }
                charCount += tokens.get(i).length();
                if (mention == null || i != mention.getEnd() - 1) continue;
                mentionOutputFile.print(charCount - 1);
                mentionOutputFile.print("|");
                mentionOutputFile.println(mention.getText());
            }
        } else {
            for (Mention mention : sentence.getMentions(Mention.MentionType.Found)) {
                mentionOutputFile.print(sentence.getSentenceId());
                mentionOutputFile.print("|");
                mentionOutputFile.print(mention.getStartChar(ignoreWhitespace));
                mentionOutputFile.print(" ");
                mentionOutputFile.print(mention.getEndChar(ignoreWhitespace));
                mentionOutputFile.print("|");
                mentionOutputFile.println(mention.getText());
            }
        }
    }

    public static Performance test(Dataset dataset, Tagger tagger, HierarchicalConfiguration config) throws IOException {
        return BANNER.test(dataset, tagger, config, null);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static Performance test(Dataset dataset, Tagger tagger, HierarchicalConfiguration config, String filenameSuffix) throws IOException {
        TagFormat tagFormat = BANNER.getTagFormat(config);
        Tokenizer tokenizer = BANNER.getTokenizer(config);
        PostProcessor postProcessor = BANNER.getPostProcessor(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String outputFilename = BANNER.getFilename(localConfig.getString("outputFilename"), filenameSuffix);
        String mentionFilename = BANNER.getFilename(localConfig.getString("mentionFilename"), filenameSuffix);
        String inContextAnalysisFilename = BANNER.getFilename(localConfig.getString("inContextAnalysisFilename"), filenameSuffix);
        PrintWriter outputFile = new PrintWriter(new BufferedWriter(new FileWriter(outputFilename)));
        PrintWriter mentionFile = new PrintWriter(new BufferedWriter(new FileWriter(mentionFilename)));
        PrintWriter inContextAnalysisFile = null;
        if (inContextAnalysisFilename != null) {
            inContextAnalysisFile = new PrintWriter(new BufferedWriter(new FileWriter(inContextAnalysisFilename)));
        }
        System.out.println("\tTagging sentences");
        if (inContextAnalysisFile != null) {
            inContextAnalysisFile.println("<html><body>");
        }
        int count = 0;
        Performance performance = new Performance(MatchCriteria.Strict);
        try {
            for (Sentence sentence : dataset.getSentences()) {
                if (count % 1000 == 0) {
                    System.out.println(count);
                }
                Sentence sentence2 = BANNER.process(tagger, tokenizer, postProcessor, sentence);
                outputFile.println(BANNER.getTrainingText(sentence2, tagFormat, EnumSet.of(Mention.MentionType.Required), Sentence.OverlapOption.Raw, Sentence.OverlapOption.Raw));
                BANNER.outputMentions(sentence2, mentionFile, false, true);
                if (inContextAnalysisFile != null) {
                    BANNER.outputAnalysis(sentence, sentence2, inContextAnalysisFile, false);
                }
                performance.update(sentence, sentence2);
                ++count;
            }
        }
        finally {
            outputFile.close();
            mentionFile.close();
            if (inContextAnalysisFile != null) {
                inContextAnalysisFile.println("</body></html>");
                inContextAnalysisFile.close();
            }
        }
        return performance;
    }

    public static Sentence process(Tagger tagger, Tokenizer tokenizer, PostProcessor postProcessor, Sentence sentence) {
        Sentence sentence2 = sentence.copy(false, false);
        tokenizer.tokenize(sentence2);
        tagger.tag(sentence2);
        postProcessor.postProcess(sentence2);
        return sentence2;
    }

    private static void outputAnalysis(Sentence sentenceRequired, Sentence sentenceFound, PrintWriter mentionOutputFile, boolean outputIfCorrect) {
        Sentence sentenceRequired2 = sentenceRequired.copy(true, true);
        FlattenPostProcessor fpp = new FlattenPostProcessor(FlattenPostProcessor.FlattenType.Union);
        fpp.postProcess(sentenceRequired2);
        List<Mention> mentionsAllowed = sentenceRequired2.getMentions(Mention.MentionType.Allowed);
        HashSet<Mention> mentionsFoundCorrect = new HashSet<Mention>();
        HashSet<Mention> mentionsFoundIncorrect = new HashSet<Mention>();
        HashSet<Mention> mentionsNotFound = new HashSet<Mention>();
        mentionsNotFound.addAll(sentenceRequired2.getMentions(Mention.MentionType.Required));
        for (Mention mention : sentenceFound.getMentions(Mention.MentionType.Required)) {
            boolean found = false;
            if (mentionsNotFound.contains(mention)) {
                mentionsNotFound.remove(mention);
                mentionsFoundCorrect.add(mention);
                found = true;
            } else if (mentionsAllowed.contains(mention)) {
                mentionsFoundCorrect.add(mention);
                found = true;
                for (Mention mentionRequired : new HashSet(mentionsNotFound)) {
                    if (!mention.overlaps(mentionRequired)) continue;
                    mentionsNotFound.remove(mentionRequired);
                }
            }
            if (found) continue;
            mentionsFoundIncorrect.add(mention);
        }
        boolean foundError = false;
        StringBuffer analysis = new StringBuffer(sentenceFound.getSentenceId());
        FontColor currentColor = FontColor.Black;
        List<Token> tokens = sentenceFound.getTokens();
        for (int i = 0; i < tokens.size(); ++i) {
            boolean bl;
            Object mention32;
            boolean inFoundCorrect = false;
            for (Mention mention : mentionsFoundCorrect) {
                inFoundCorrect |= mention.contains(i);
            }
            boolean inFoundIncorrect = false;
            for (Object mention32 : mentionsFoundIncorrect) {
                inFoundIncorrect |= ((Mention)mention32).contains(i);
            }
            boolean bl2 = false;
            mention32 = mentionsNotFound.iterator();
            while (mention32.hasNext()) {
                Mention mention4 = (Mention)mention32.next();
                bl |= mention4.contains(i);
            }
            foundError |= bl || inFoundIncorrect;
            if (inFoundCorrect) {
                if (inFoundIncorrect || bl) {
                    System.out.println("=============");
                    System.out.println("inFoundIncorrect: " + inFoundIncorrect);
                    System.out.println("inNotFound: " + bl);
                    System.out.println(sentenceFound.getSentenceId());
                    System.out.println(sentenceFound.getText());
                    Mention badMention = sentenceFound.getMentions(tokens.get(i), EnumSet.of(Mention.MentionType.Required)).get(0);
                    System.out.println("badMention: " + badMention);
                    System.out.println("sentenceFound.getMentions().contains(): " + sentenceFound.getMentions(Mention.MentionType.Required).contains(badMention));
                    System.out.println("mentionsRequired.contains(): " + sentenceRequired.getMentions(Mention.MentionType.Required).contains(badMention));
                    System.out.println("mentionsAllowed.contains(): " + mentionsAllowed.contains(badMention));
                    System.out.println("mentionsFoundCorrect.contains(): " + mentionsFoundCorrect.contains(badMention));
                    System.out.println("mentionsFoundIncorrect.contains(): " + mentionsFoundIncorrect.contains(badMention));
                    System.out.println("mentionsNotFound.contains(): " + mentionsNotFound.contains(badMention));
                    System.out.println("sentenceFound.getMentions(): " + sentenceFound.getMentions(Mention.MentionType.Required));
                    System.out.println("mentionsFoundCorrect: " + mentionsFoundCorrect);
                    System.out.println("mentionsFoundIncorrect: " + mentionsFoundIncorrect);
                    System.out.println("mentionsNotFound: " + mentionsNotFound);
                    System.out.println("=============");
                }
                assert (!inFoundIncorrect);
                assert (!bl);
                analysis.append(currentColor.changeColor(FontColor.Green));
                currentColor = FontColor.Green;
            } else if (inFoundIncorrect && bl) {
                analysis.append(currentColor.changeColor(FontColor.Purple));
                currentColor = FontColor.Purple;
            } else if (inFoundIncorrect) {
                analysis.append(currentColor.changeColor(FontColor.Red));
                currentColor = FontColor.Red;
            } else if (bl) {
                analysis.append(currentColor.changeColor(FontColor.Blue));
                currentColor = FontColor.Blue;
            } else {
                analysis.append(currentColor.changeColor(FontColor.Black));
                currentColor = FontColor.Black;
            }
            analysis.append(tokens.get(i).getText());
        }
        analysis.append(currentColor.changeColor(FontColor.Black));
        analysis.append("<br>");
        if (foundError || outputIfCorrect) {
            mentionOutputFile.println(analysis);
        }
    }

    public static Dataset getDataset(HierarchicalConfiguration config) {
        Tokenizer tokenizer = BANNER.getTokenizer(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String datasetName = localConfig.getString("datasetName");
        Dataset dataset = null;
        try {
            dataset = (Dataset)Class.forName(datasetName).newInstance();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        dataset.setTokenizer(tokenizer);
        dataset.load(config);
        return dataset;
    }

    private static TagFormat getTagFormat(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        return TagFormat.valueOf(localConfig.getString("tagFormat"));
    }

    public static Tokenizer getTokenizer(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        try {
            String tokenizerName = localConfig.getString("tokenizer");
            System.out.println("reading tokenizer: " + tokenizerName);
            Tokenizer tokenizer = (Tokenizer)Class.forName(tokenizerName).newInstance();
            return tokenizer;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static DictionaryTagger getDictionary(HierarchicalConfiguration config) {
        Tokenizer tokenizer = BANNER.getTokenizer(config);
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String dictionaryName = localConfig.getString("dictionaryTagger");
        if (dictionaryName == null) {
            return null;
        }
        DictionaryTagger dictionary = null;
        try {
            dictionary = (DictionaryTagger)Class.forName(dictionaryName).newInstance();
            dictionary.configure(config, tokenizer);
            dictionary.load(config);
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        return dictionary;
    }

    public static PostProcessor getPostProcessor(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        SequentialPostProcessor postProcessor = new SequentialPostProcessor();
        if (localConfig.containsKey("useParenthesisPostProcessing") && localConfig.getBoolean("useParenthesisPostProcessing")) {
            postProcessor.addPostProcessor(new ParenthesisPostProcessor());
        }
        if (localConfig.containsKey("useLocalAbbreviationPostProcessing") && localConfig.getBoolean("useLocalAbbreviationPostProcessing")) {
            postProcessor.addPostProcessor(new LocalAbbreviationPostProcessor());
        }
        return postProcessor;
    }

    private static int getCRFOrder(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        return localConfig.getInt("crfOrder");
    }

    public static dragon.nlp.tool.Tagger getPosTagger(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String posTagger = localConfig.getString("posTagger");
        if (posTagger == null) {
            return null;
        }
        String posTaggerDataDirectory = localConfig.getString("posTaggerDataDirectory");
        if (posTaggerDataDirectory == null) {
            throw new IllegalArgumentException("Must specify data directory for POS tagger");
        }
        if (posTagger.equals(HeppleTagger.class.getName())) {
            return new HeppleTagger(new Util().getFile(posTaggerDataDirectory));
        }
        if (posTagger.equals(MedPostTagger.class.getName())) {
            return new MedPostTagger(posTaggerDataDirectory);
        }
        throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
    }

    public static EngLemmatiser getLemmatiser(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String lemmatiserDataDirectory = Thread.currentThread().getContextClassLoader().getResource("nlpdata/lemmatiser").getFile();
        File f = new File(lemmatiserDataDirectory + "/adj.exec");
        System.out.println(lemmatiserDataDirectory + " Exists?: " + f.exists());
        if (lemmatiserDataDirectory == null) {
            return null;
        }
        return new EngLemmatiser(lemmatiserDataDirectory, false, true);
    }

    public static String getSimFindFilename(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String simFindFilename = localConfig.getString("simFindFilename");
        return simFindFilename;
    }

    private static Set<Mention.MentionType> getMentionTypes(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String mentionTypesStr = localConfig.getString("mentionTypes");
        if (mentionTypesStr == null) {
            throw new RuntimeException("Configuration must contain parameter \"mentionTypes\"");
        }
        HashSet<Mention.MentionType> mentionTypes = new HashSet<Mention.MentionType>();
        for (String mentionTypeName : mentionTypesStr.split("\\s+")) {
            mentionTypes.add(Mention.MentionType.valueOf(mentionTypeName));
        }
        return EnumSet.copyOf(mentionTypes);
    }

    private static Sentence.OverlapOption getSameTypeOverlapOption(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String sameTypeOverlapOption = localConfig.getString("sameTypeOverlapOption");
        if (sameTypeOverlapOption == null) {
            throw new RuntimeException("Configuration must contain parameter \"sameTypeOverlapOption\"");
        }
        return Sentence.OverlapOption.valueOf(sameTypeOverlapOption);
    }

    private static Sentence.OverlapOption getDifferentTypeOverlapOption(HierarchicalConfiguration config) {
        SubnodeConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
        String differentTypeOverlapOption = localConfig.getString("differentTypeOverlapOption");
        if (differentTypeOverlapOption == null) {
            throw new RuntimeException("Configuration must contain parameter \"differentTypeOverlapOption\"");
        }
        return Sentence.OverlapOption.valueOf(differentTypeOverlapOption);
    }

    public static class PerformanceData {
        int tp = 0;
        int fp = 0;
        int fn = 0;

        public double getPrecision() {
            return (double)this.tp / (double)(this.tp + this.fp);
        }

        public double getRecall() {
            return (double)this.tp / (double)(this.tp + this.fn);
        }

        public double getFMeasure() {
            double p = this.getPrecision();
            double r = this.getRecall();
            return 2.0 * p * r / (p + r);
        }

        public void print() {
            System.out.println("TP: " + this.tp);
            System.out.println("FP: " + this.fp);
            System.out.println("FN: " + this.fn);
            System.out.println("precision: " + this.getPrecision());
            System.out.println("   recall: " + this.getRecall());
            System.out.println("f-measure: " + this.getFMeasure());
        }
    }

    public static class Performance {
        private PerformanceData overall;
        private Map<EntityType, PerformanceData> perMention;
        private Map<String, PerformanceData> perText;

        public Performance(MatchCriteria matchCriteria) {
            if (matchCriteria != MatchCriteria.Strict) {
                throw new IllegalArgumentException("Not implemented");
            }
            this.overall = new PerformanceData();
            this.perMention = new HashMap<EntityType, PerformanceData>();
            this.perText = new HashMap<String, PerformanceData>();
        }

        private PerformanceData getMentionPerformanceData(EntityType type) {
            PerformanceData performanceData = this.perMention.get(type);
            if (performanceData == null) {
                performanceData = new PerformanceData();
                this.perMention.put(type, performanceData);
            }
            return performanceData;
        }

        private PerformanceData getTextPerformanceData(String text) {
            PerformanceData performanceData = this.perText.get(text);
            if (performanceData == null) {
                performanceData = new PerformanceData();
                this.perText.put(text, performanceData);
            }
            return performanceData;
        }

        public void update(Sentence sentenceRequired, Sentence sentenceFound) {
            HashSet<Mention> mentionsNotFound = new HashSet<Mention>(sentenceRequired.getMentions(Mention.MentionType.Required));
            List<Mention> mentionsAllowed = sentenceRequired.getMentions(Mention.MentionType.Allowed);
            List<Mention> mentionsFound = sentenceFound.getMentions(Mention.MentionType.Found);
            for (Mention mention : mentionsFound) {
                boolean found = false;
                if (mentionsNotFound.contains(mention)) {
                    mentionsNotFound.remove(mention);
                    found = true;
                    ++this.overall.tp;
                    ++this.getMentionPerformanceData((EntityType)mention.getEntityType()).tp;
                    ++this.getTextPerformanceData((String)mention.getText()).tp;
                } else if (mentionsAllowed.contains(mention)) {
                    found = true;
                    for (Mention mentionRequired : new HashSet<Mention>(mentionsNotFound)) {
                        if (!mention.overlaps(mentionRequired)) continue;
                        mentionsNotFound.remove(mentionRequired);
                        ++this.overall.tp;
                        ++this.getMentionPerformanceData((EntityType)mentionRequired.getEntityType()).tp;
                        ++this.getTextPerformanceData((String)mentionRequired.getText()).tp;
                    }
                }
                if (found) continue;
                ++this.overall.fp;
                ++this.getMentionPerformanceData((EntityType)mention.getEntityType()).fp;
                ++this.getTextPerformanceData((String)mention.getText()).fp;
            }
            for (Mention mentionNotFound : mentionsNotFound) {
                ++this.overall.fn;
                ++this.getMentionPerformanceData((EntityType)mentionNotFound.getEntityType()).fn;
                ++this.getTextPerformanceData((String)mentionNotFound.getText()).fn;
            }
        }

        public PerformanceData getOverall() {
            return this.overall;
        }

        public Map<EntityType, PerformanceData> getPerMention() {
            return Collections.unmodifiableMap(this.perMention);
        }

        public Map<String, PerformanceData> getPerText() {
            return Collections.unmodifiableMap(this.perText);
        }

        public void print() {
            System.out.println("OVERALL: ");
            this.overall.print();
            for (EntityType type : this.perMention.keySet()) {
                System.out.println();
                System.out.println("TYPE: \"" + type.getText() + "\"");
                this.perMention.get(type).print();
            }
        }
    }

    public static enum MatchCriteria {
        Strict,
        Left,
        Right,
        LeftOrRight,
        Approximate,
        Partial;

    }

    private static enum FontColor {
        Black,
        Blue,
        Green,
        Red,
        Purple;


        public String toString() {
            return this.name().toLowerCase();
        }

        public String changeColor(FontColor newColor) {
            StringBuffer str = new StringBuffer();
            if (!this.equals((Object)newColor) && !this.equals((Object)Black)) {
                str.append("</font>");
            }
            str.append(" ");
            if (!this.equals((Object)newColor) && !newColor.equals((Object)Black)) {
                str.append("<font color=\"" + newColor.toString() + "\">");
            }
            return str.toString();
        }
    }

    private static class Count {
        private int count = 0;

        public int getCount() {
            return this.count;
        }

        public void setCount(int count) {
            this.count = count;
        }

        public void incr() {
            ++this.count;
        }
    }

    private static class DatasetCombiner
    extends Dataset {
        public DatasetCombiner(Collection<Dataset> datasets) {
            for (Dataset dataset : datasets) {
                this.sentences.addAll(dataset.getSentences());
            }
        }

        @Override
        public List<Dataset> split(int n) {
            throw new UnsupportedOperationException();
        }

        @Override
        public void load(HierarchicalConfiguration config) {
            throw new UnsupportedOperationException();
        }
    }

    private static enum Function {
        help,
        tag,
        test,
        train,
        eval5by2,
        eval10Fold,
        describeDataset,
        testDict,
        coordEllipsis;

    }
}

