/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources.uima;

import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.genemapper.WikipediaCategoryManager;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.ae.genemapper.desc.WikipediaFamilyParsing.Entity;
import de.julielab.jcore.ae.genemapper.desc.WikipediaFamilyParsing.EntityChunk;
import de.julielab.jcore.ae.genemapper.desc.WikipediaFamilyParsing.UnspecTitle;
import de.julielab.jcore.types.EntityMention;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.wikipedia.Title;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe GeneMapper Wikipedia Index Writer", description="Expects CASes read by the GeneMapper Wikipedia Reader and processed by the entity class annotation RUTA analysis engine.. Creates an index of the read page excerpts and adds the entity class of the page as extracted by the RUTA component. This is supposed to help in the classification of gene/protein families and groups.")
public class WikipediaIndexWriter
extends JCasAnnotator_ImplBase {
    public static final String PARAM_INDEX_DIRECTORY = "IndexDirectory";
    public static final String PARAM_REDIRECT_MAP = "RedirectMap";
    public static final String PARAM_WIKIPEDIA_CATEGORY_TREE_PATH = "WikipediaCategoryTreePath";
    private static final Logger log = LoggerFactory.getLogger(WikipediaIndexWriter.class);
    private static IndexWriter iw;
    private static Map<String, List<String>> redirectMap;
    private static WikipediaCategoryManager wikipediaCategoryManager;
    @ConfigurationParameter(name="IndexDirectory", description="The path for the index to be created. An already existing index will be overwritten.")
    private String indexDirectoryPath;
    @ConfigurationParameter(name="RedirectMap", description="Optional. File that maps page titles to the titles of pages redirecting to it. If given, those redirect titles are added to the 'title' field of the respective document.")
    private String redirectMapPath;
    @ConfigurationParameter(name="WikipediaCategoryTreePath", mandatory=false, description="Optional. File created by GeNo's 'WikipediaCategoryTreeAndRedirectsExtractor' class that represents a map from page and category titles to categories they belong to. Will be used to filter for pages that are in some way related to the Molecular Biology category. Will also add the category path from Molecular Biology to the indexed page to the index.")
    private String wikipediaCategoryTreePath;
    private final TermNormalizer termNormalizer = new TermNormalizer();
    private final Set<String> prohibitedMolecularBiologyPathElements = Set.of("Category:Water", "Category:Human geography", "Category:People", "Category:Bodies of water", "Category:Reasoning", "Category:Cognition", "Category:Cars", "Category:Aggression", "Category:Reproduction", "Category:Genealogy", "Category:Artificial intelligence", "Category:Taxa", "Category:Anatomy", "Category:Neuroscience", "Category:Human names", "Category:Botany", "Category:Philosophy of biology");

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void initialize(UimaContext context) throws ResourceInitializationException {
        this.indexDirectoryPath = (String)context.getConfigParameterValue(PARAM_INDEX_DIRECTORY);
        this.redirectMapPath = (String)context.getConfigParameterValue(PARAM_REDIRECT_MAP);
        this.wikipediaCategoryTreePath = (String)context.getConfigParameterValue(PARAM_WIKIPEDIA_CATEGORY_TREE_PATH);
        Class<WikipediaIndexWriter> clazz = WikipediaIndexWriter.class;
        synchronized (WikipediaIndexWriter.class) {
            try {
                Path indexPath = Path.of(this.indexDirectoryPath, new String[0]);
                File indexFile = indexPath.toFile();
                if (!indexFile.exists()) {
                    log.info("Creating index directory {}.", (Object)indexPath);
                    indexFile.mkdirs();
                }
                if (iw == null) {
                    IndexWriterConfig iwc = new IndexWriterConfig((Analyzer)new WhitespaceAnalyzer());
                    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
                    FSDirectory indexDirectory = FSDirectory.open((Path)indexPath);
                    iw = new IndexWriter((Directory)indexDirectory, iwc);
                }
            }
            catch (IOException e) {
                log.error("IOException while initializing the index directory.", (Throwable)e);
                throw new ResourceInitializationException((Throwable)e);
            }
            if (redirectMap == null) {
                try {
                    redirectMap = this.readRedirectMap(this.redirectMapPath);
                }
                catch (IOException e) {
                    log.error("IOException while reading the Wikipedia redirect map.", (Throwable)e);
                    throw new ResourceInitializationException((Throwable)e);
                }
            }
            if (this.wikipediaCategoryTreePath != null && wikipediaCategoryManager == null) {
                String root = "Category:Biology";
                log.info("Creating Dijkstra tree for {}. Prohibited path elements: {}", (Object)root, this.prohibitedMolecularBiologyPathElements);
                wikipediaCategoryManager = new WikipediaCategoryManager(this.wikipediaCategoryTreePath, true);
                wikipediaCategoryManager.buildDijkstraTree(root);
            }
            // ** MonitorExit[var2_2] (shouldn't be in output)
            return;
        }
    }

    private Map<String, List<String>> readRedirectMap(String redirectMapPath) throws IOException {
        try (BufferedReader br = FileUtilities.getReaderFromFile((File)new File(redirectMapPath));){
            Map<String, List<String>> map = br.lines().skip(1L).map(line -> line.split("\\t")).collect(Collectors.toMap(s -> s[0], s -> {
                ArrayList<String> l = new ArrayList<String>();
                l.add(s[1].intern());
                return l;
            }, (l1, l2) -> {
                l1.addAll(l2);
                return l1;
            }));
            return map;
        }
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();
        try {
            iw.close();
        }
        catch (IOException e) {
            log.error("Could not close index writer", (Throwable)e);
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        Document doc = this.createDocument(jCas);
        try {
            if (doc != null) {
                iw.addDocument((Iterable)doc);
            }
        }
        catch (IOException e) {
            log.error("Could not index document {}", (Object)doc, (Object)e);
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    private Document createDocument(JCas jCas) {
        Title pageTitle;
        Header header = (Header)JCasUtil.selectSingle((JCas)jCas, Header.class);
        Map wikipediaTitleIndex = JCasUtil.indexCovered((JCas)jCas, EntityChunk.class, EntityMention.class);
        Optional pageTitleOpt = JCasUtil.select((JCas)jCas, Title.class).stream().findAny();
        Optional unspecTitleOpt = JCasUtil.select((JCas)jCas, UnspecTitle.class).stream().findAny();
        Collection entityChunks = JCasUtil.select((JCas)jCas, EntityChunk.class);
        Collection entities = JCasUtil.select((JCas)jCas, Entity.class);
        boolean titleIsInPlural = false;
        List path = null;
        if (wikipediaCategoryManager != null && (path = wikipediaCategoryManager.getShortestPathToDijkstraTreeRoot(header.getTitle(), this.prohibitedMolecularBiologyPathElements)).isEmpty()) {
            log.debug("Skipping page {} because no path to the category graph root was found.", (Object)header.getTitle());
            return null;
        }
        if (pageTitleOpt.isPresent() && ((pageTitle = (Title)pageTitleOpt.get()).getEnd() + 1 < jCas.getDocumentText().length() && jCas.getDocumentText().charAt(pageTitle.getEnd()) == 's' || pageTitle.getEnd() + 2 < jCas.getDocumentText().length() && jCas.getDocumentText().charAt(pageTitle.getEnd()) == 'e' && jCas.getDocumentText().charAt(pageTitle.getEnd() + 1) == 's')) {
            titleIsInPlural = true;
        }
        Document doc = new Document();
        doc.add((IndexableField)new StringField("pageid", header.getDocId(), Field.Store.YES));
        doc.add((IndexableField)new TextField("title", this.termNormalizer.normalize(header.getTitle()), Field.Store.NO));
        doc.add((IndexableField)new StoredField("title", header.getTitle()));
        for (String redirectTitle : redirectMap.getOrDefault(header.getTitle(), Collections.emptyList())) {
            doc.add((IndexableField)new TextField("title", this.termNormalizer.normalize(redirectTitle), Field.Store.NO));
            doc.add((IndexableField)new StoredField("title", redirectTitle));
        }
        if (unspecTitleOpt.isPresent()) {
            doc.add((IndexableField)new StringField("hasunspectitle", "true", Field.Store.YES));
        }
        if (titleIsInPlural) {
            doc.add((IndexableField)new StringField("titleisinplural", "true", Field.Store.YES));
        }
        for (EntityChunk chunk : entityChunks) {
            doc.add((IndexableField)new TextField("entitychunks", this.termNormalizer.normalize(chunk.getCoveredText()), Field.Store.YES));
        }
        for (Entity entity : entities) {
            doc.add((IndexableField)new TextField("entities", this.termNormalizer.normalize(entity.getCoveredText()), Field.Store.YES));
        }
        for (EntityChunk chunk : entityChunks) {
            Collection wikipediaTitleInEntityChunk = (Collection)wikipediaTitleIndex.get(chunk);
            for (EntityMention wikipediaTitle : wikipediaTitleInEntityChunk) {
                doc.add((IndexableField)new TextField("mentionedpagetitles", this.termNormalizer.normalize(wikipediaTitle.getCoveredText()), Field.Store.NO));
                doc.add((IndexableField)new StoredField("mentionedpagetitles", wikipediaTitle.getCoveredText()));
            }
        }
        if (wikipediaCategoryManager != null) {
            for (String pathElement : path) {
                doc.add((IndexableField)new TextField("molecularbiologypath", this.termNormalizer.normalize(pathElement), Field.Store.NO));
                doc.add((IndexableField)new StoredField("molecularbiologypath", pathElement));
            }
            doc.add((IndexableField)new IntPoint("molecularbiologypathlength", new int[]{path.size()}));
        } else {
            System.out.println("WikiCategoryManager is null!");
        }
        return doc;
    }

    static {
        redirectMap = Collections.emptyMap();
    }
}

