/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.geneexpbase.data;

import de.julielab.geneexpbase.data.DocumentSourceFiles;
import de.julielab.geneexpbase.genemodel.GeneMention;
import java.util.EnumSet;
import java.util.List;

public class DocumentSourceFileRegistry {
    public static DocumentSourceFiles decaSpeciesCorpus() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("decaSpeciesCorpus");
        files.setHasGeneIds(false);
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GOLD));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setSpeciesCorpus(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/species_corpus_0.2/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/species_corpus_0.2/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/species_corpus_0.2");
        files.setPredictedGenesPath("gold.taxlist");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("gold.taxlist");
        return files;
    }

    public static DocumentSourceFiles speciesCorpusCustomGnormPlusBC2TrainGoldGenes() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("speciesCorpusCustomGnormPlusBC2Train");
        files.setHasGeneIds(false);
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GOLD));
        files.setAllowedGeneTypes(List.of("Gene"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setSpeciesCorpus(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/species_corpus_custom_data");
        files.setPredictedGenesPath("gold.taxlist");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("gold.taxlist");
        return files;
    }

    public static DocumentSourceFiles speciesCorpusCustomGnormPlusBC2TrainFlairGenes() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("speciesCorpusCustomGnormPlusBC2Train");
        files.setHasGeneIds(false);
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setSpeciesCorpus(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/species_corpus_custom_data");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("gold.taxlist");
        return files;
    }

    public static DocumentSourceFiles gnpBc2gnTrain() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("gnpBc2gnTrainFlPgCvGaz");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/gnormplus_data/bc2train");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setCorefPath("annotated/coreferences.tsv.gz");
        files.setAppositionsPath("annotated/appositions.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("bc2train.genelist");
        return files;
    }

    public static DocumentSourceFiles gnpBc2gnTest() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("gnpBc2gnTestFlPgCvGaz");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/bc2_data/test/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/bc2_data/test/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/gnormplus_data/bc2test");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setCorefPath("annotated/coreferences.tsv.gz");
        files.setAppositionsPath("annotated/appositions.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("bc2test.genelist");
        return files;
    }

    public static DocumentSourceFiles gnpBc2gnTestFlairProGeneGnpEntities() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("gnpBc2gnTestFlPgCvGazGnpEntities");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_GNP_ENTITIES, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "FamilyName"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/bc2_data/test/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/bc2_data/test/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/gnormplus_data/bc2test/annotated_progene_gnp_entities");
        files.setPredictedGenesPath("genes.tsv.gz");
        files.setSentencesPath("annotations.tsv.gz");
        files.setChunksPath("annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotations.tsv.gz");
        files.setPosPath("annotations.tsv.gz");
        files.setOntologyMentionsPath("annotations.tsv.gz");
        files.setSpeciesPath("annotations.tsv.gz");
        files.setAcronymsPath("acronyms.tsv.gz");
        files.setCorefPath("coreferences.tsv.gz");
        files.setAppositionsPath("appositions.tsv.gz");
        files.setDocTextPath("text");
        files.setGoldGeneList("../bc2test.genelist");
        return files;
    }

    public static DocumentSourceFiles gnpBc2gnTestFlairProGeneGnpEntitiesConsistencyLongerGazetteer() {
        DocumentSourceFiles files = DocumentSourceFileRegistry.gnpBc2gnTestFlairProGeneGnpEntities();
        files.setName("gnpBc2gnTestFlPgCvGazGnpEntitiesConsistencyLongerGaz");
        files.setBasePath("../built-resources/corpora/gnormplus_data/bc2test/annotated_progene_gnp_entities_consis_longgaz");
        files.setPredictedGenesPath("genes.tsv.gz");
        files.setSentencesPath("annotations.tsv.gz");
        files.setChunksPath("annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotations.tsv.gz");
        files.setPosPath("annotations.tsv.gz");
        files.setOntologyMentionsPath("annotations.tsv.gz");
        files.setSpeciesPath("annotations.tsv.gz");
        files.setAcronymsPath("acronyms.tsv.gz");
        files.setCorefPath("coreferences.tsv.gz");
        files.setAppositionsPath("appositions.tsv.gz");
        files.setDocTextPath("text");
        files.setGoldGeneList("../bc2test.genelist");
        return files;
    }

    public static DocumentSourceFiles bc2gntest() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("bc2gntest");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/bc2_data/test/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/bc2_data/test/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/bc2_data/test");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setCorefPath("annotated/coreferences.tsv.gz");
        files.setAppositionsPath("annotated/appositions.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("test.genelist");
        return files;
    }

    public static DocumentSourceFiles gnpNlmIat() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("gnpNlmIat");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setBasePath("../built-resources/corpora/gnormplus_data/nlmiat");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setCorefPath("annotated/coreferences.tsv.gz");
        files.setAppositionsPath("annotated/appositions.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("nlmiat.genelist");
        return files;
    }

    public static DocumentSourceFiles bc3Trainset1() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("bc3Trainset1");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(false);
        files.setBasePath("../built-resources/corpora/bc3_data/trainset1");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("trainset1.genelist");
        return files;
    }

    public static DocumentSourceFiles bc3Test50() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("bc3Test50");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(false);
        files.setBasePath("../built-resources/corpora/bc3_data/test50");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("test50.genelist");
        return files;
    }

    public static DocumentSourceFiles bc3Trainset1InferredMentionIds() {
        DocumentSourceFiles files = DocumentSourceFileRegistry.bc3Trainset1();
        files.setInferDocumentLevelLabelsToMentions(true);
        files.setCompletelyAnnotated(true);
        return files;
    }

    public static DocumentSourceFiles bc3Trainset2() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("bc3Trainset2");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setInferDocumentLevelLabelsToMentions(true);
        files.setCompletelyAnnotated(false);
        files.setBasePath("../built-resources/corpora/bc3_data/trainset2");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("trainset2.genelist");
        return files;
    }

    public static DocumentSourceFiles bc3Trainset1Species() {
        DocumentSourceFiles files = DocumentSourceFileRegistry.bc3Trainset1();
        files.setName("bc3Trainset1Species");
        files.setHasGeneIds(true);
        files.setGoldGeneList("trainset1.taxlist");
        files.setSpeciesCorpus(true);
        files.setInferDocumentLevelLabelsToMentions(true);
        return files;
    }

    public static DocumentSourceFiles bc3Trainset2Species() {
        DocumentSourceFiles files = DocumentSourceFileRegistry.bc3Trainset2();
        files.setName("bc3Trainset2Species");
        files.setHasGeneIds(false);
        files.setGoldGeneList("trainset2.taxlist");
        files.setSpeciesCorpus(true);
        files.setInferDocumentLevelLabelsToMentions(true);
        return files;
    }

    public static DocumentSourceFiles gnpBc2gnTrainGnpPred() {
        DocumentSourceFiles files = DocumentSourceFileRegistry.gnpBc2gnTrain();
        files.setName("gnpBc2gnTrainGnpPred");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GNORM_PLUS));
        files.setPredictedGenesPath("bc2gntrain.genormplusoutput.genelist");
        return files;
    }

    public static DocumentSourceFiles gnpBc2gnTrainGnpPredSpeciesAfterSr() {
        DocumentSourceFiles files = DocumentSourceFileRegistry.gnpBc2gnTrain();
        files.setName("gnpBc2gnTrainGnpPredSpeciesAfterSr");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GNORM_PLUS));
        files.setAllowedGeneTypes(List.of("Gene", "FamilyName", "DomainMotif", "Cell"));
        files.setFilterSpecies(false);
        files.setAddReferenceSpecies(false);
        files.setPredictedGenesPath("bc2gntrain.genormplusoutput_after_sr.taxlist");
        files.setSpeciesPath("bc2gntrain.genormplusoutput.taxlist");
        files.setSentencesPath("bc2gntrain.genormplusoutput_sentences_from_sr.tsv");
        return files;
    }

    public static DocumentSourceFiles gnpBc2gnTrainGnpPredGnpSpecies() {
        DocumentSourceFiles files = DocumentSourceFileRegistry.gnpBc2gnTrainGnpPred();
        files.setName("gnpBc2gnTrainGnpPredGnpSpecies");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GNORM_PLUS));
        files.setSpeciesPath("bc2gntrain.genormplusoutput.taxlist");
        files.setFilterSpecies(false);
        files.setAddReferenceSpecies(false);
        return files;
    }

    public static DocumentSourceFiles nlmGeneTrain() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("nlmGeneTrain");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setBasePath("../built-resources/corpora/nlm_gene/train");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setCorefPath("annotated/coreferences.tsv.gz");
        files.setAppositionsPath("annotated/appositions.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("nlmgenetrain.genelist");
        return files;
    }

    public static DocumentSourceFiles nlmGeneTest() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("nlmGeneTrain");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setBasePath("../built-resources/corpora/nlm_gene/test");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setCorefPath("annotated/coreferences.tsv.gz");
        files.setAppositionsPath("annotated/appositions.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("nlmgenetest.nofamilies.genelist");
        return files;
    }

    public static DocumentSourceFiles unitTests() {
        DocumentSourceFiles files = new DocumentSourceFiles();
        files.setName("unitTests");
        files.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        files.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        files.setFilterSpecies(true);
        files.setAddReferenceSpecies(true);
        files.setCompletelyAnnotated(true);
        files.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        files.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        files.setBasePath("../built-resources/corpora/gnormplus_bc2_unittest_data");
        files.setPredictedGenesPath("annotated/genes.tsv.gz");
        files.setSentencesPath("annotated/annotations.tsv.gz");
        files.setChunksPath("annotated/annotations.tsv.gz");
        files.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        files.setPosPath("annotated/annotations.tsv.gz");
        files.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        files.setSpeciesPath("annotated/annotations.tsv.gz");
        files.setAcronymsPath("annotated/acronyms.tsv.gz");
        files.setCorefPath("annotated/coreferences.tsv.gz");
        files.setAppositionsPath("annotated/appositions.tsv.gz");
        files.setDocTextPath("annotated/text");
        files.setGoldGeneList("unitTests.genelist");
        return files;
    }
}

