package edu.pitt.dbmi.nlp.noble.coder;

import edu.pitt.dbmi.nlp.noble.coder.model.Document;
import edu.pitt.dbmi.nlp.noble.coder.model.Mention;
import edu.pitt.dbmi.nlp.noble.coder.model.Processor;
import edu.pitt.dbmi.nlp.noble.coder.model.Sentence;
import edu.pitt.dbmi.nlp.noble.coder.processor.DocumentProcessor;
import edu.pitt.dbmi.nlp.noble.terminology.Terminology;
import edu.pitt.dbmi.nlp.noble.terminology.TerminologyError;
import edu.pitt.dbmi.nlp.noble.terminology.TerminologyException;
import edu.pitt.dbmi.nlp.noble.terminology.impl.NobleCoderTerminology;
import edu.pitt.dbmi.nlp.noble.tools.AcronymDetector;
import edu.pitt.dbmi.nlp.noble.tools.ConText;
import edu.pitt.dbmi.nlp.noble.tools.TextTools;
import edu.pitt.dbmi.nlp.noble.util.DeIDUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.List;

/* loaded from: input_file:edu/pitt/dbmi/nlp/noble/coder/NobleCoder.class */
public class NobleCoder implements Processor<Document> {
    public static int FILTER_DEID = 1;
    public static int FILTER_HEADER = 2;
    public static int FILTER_WORKSHEET = 4;
    private Terminology terminology;
    private Processor<Document> documentProcessor;
    private AcronymDetector acronymDetector;
    private ConText conText;
    private boolean handleAcronyms = true;
    private boolean handleNegation = true;
    private int processFilter = FILTER_DEID | FILTER_HEADER;
    private long time;

    public static void main(String[] strArr) throws Exception {
        NobleCoder nobleCoder = new NobleCoder("NCI_Thesaurus");
        nobleCoder.setProcessFilter(nobleCoder.getProcessFilter() | FILTER_WORKSHEET);
        ((NobleCoderTerminology) nobleCoder.getTerminology()).setSelectBestCandidate(true);
        for (File file : new File("/home/tseytlin/Data/DeepPhe/Melanoma/sample/deid/").listFiles()) {
            if (file.getName().endsWith(".txt")) {
                System.out.print("processing\t" + file.getName() + "\t..\t");
                Document process = nobleCoder.process(file);
                PrintStream printStream = new PrintStream(new File(file.getParentFile(), file.getName() + ".processed"));
                printStream.println(process.getTitle());
                printStream.println("---------------------------------------");
                for (String str : process.getProperties().keySet()) {
                    printStream.println(((Object) str) + "\t->\t" + process.getProperties().get(str));
                }
                printStream.println("---------------------------------------");
                for (Sentence sentence : process.getSentences()) {
                    printStream.println("sentence:\t|" + sentence.getOffset() + "|\t" + sentence.getSentenceType() + "|\t" + (sentence.getSection() != null ? sentence.getSection().getTitle() : "none") + "|\t" + sentence + "\t|" + (sentence.getProperties().containsKey("time") ? sentence.getProperties().get("time") : ""));
                    for (Mention mention : sentence.getMentions()) {
                        printStream.println("\tmention:\t" + mention + " | " + mention.getConcept().getCode() + " | " + mention.getConcept().getName() + " | " + mention.getAnnotations());
                    }
                }
                printStream.println("---------------------------------------");
                printStream.println(nobleCoder.getProcessTime() + " ms");
                printStream.close();
                System.out.println(nobleCoder.getProcessTime() + " ms");
            }
        }
    }

    public NobleCoder(File file) throws IOException {
        NobleCoderTerminology.setPersistenceDirectory(file.getParentFile());
        setTerminology(new NobleCoderTerminology(file.getName()));
    }

    public NobleCoder(String str) throws IOException {
        setTerminology(new NobleCoderTerminology(str));
    }

    public NobleCoder(Terminology terminology) {
        setTerminology(terminology);
    }

    public NobleCoder() {
    }

    public Processor<Document> getDocumentProcessor() {
        if (this.documentProcessor == null) {
            this.documentProcessor = new DocumentProcessor();
        }
        return this.documentProcessor;
    }

    public void setDocumentProcessor(Processor<Document> processor) {
        this.documentProcessor = processor;
    }

    public int getProcessFilter() {
        return this.processFilter;
    }

    public void setProcessFilter(int i) {
        this.processFilter = i;
    }

    public void setTerminology(Terminology terminology) {
        this.terminology = terminology;
        try {
            setupAcronyms(new File(terminology.getLocation()));
        } catch (IOException e) {
            throw new TerminologyError("Unable to fine terminology location", e);
        }
    }

    public AcronymDetector getAcronymDetector() {
        if (this.acronymDetector == null) {
            this.acronymDetector = new AcronymDetector();
        }
        return this.acronymDetector;
    }

    private void setupAcronyms(File file) throws IOException {
    }

    public Terminology getTerminology() {
        return this.terminology;
    }

    public boolean isAcronymExpansion() {
        return this.handleAcronyms;
    }

    public void setAcronymExpansion(boolean z) {
        this.handleAcronyms = z;
    }

    public boolean isContextDetection() {
        return this.handleNegation;
    }

    public void setContextDetection(boolean z) {
        this.handleNegation = z;
    }

    public Document process(File file) throws FileNotFoundException, IOException, TerminologyException {
        Document document = new Document(TextTools.getText(new FileInputStream(file)));
        document.setLocation(file.getAbsolutePath());
        document.setTitle(file.getName());
        return process(getDocumentProcessor().process(document));
    }

    @Override // edu.pitt.dbmi.nlp.noble.coder.model.Processor
    public Document process(Document document) throws TerminologyException {
        this.time = System.currentTimeMillis();
        getAcronymDetector().clearAcronyms();
        if (Document.STATUS_UNPROCESSED.equals(document.getDocumentStatus())) {
            document = getDocumentProcessor().process(document);
        }
        for (Sentence sentence : document.getSentences()) {
            if (!filterSentence(sentence)) {
                process(sentence);
            }
        }
        document.setDocumentStatus(Document.STATUS_CODED);
        this.time = System.currentTimeMillis() - this.time;
        document.getProcessTime().put(getClass().getSimpleName(), Long.valueOf(this.time));
        return document;
    }

    public List<Mention> process(String str) throws TerminologyException {
        return process(new Sentence(str)).getMentions();
    }

    public Sentence process(Sentence sentence) throws TerminologyException {
        long currentTimeMillis = System.currentTimeMillis();
        String text = sentence.getText();
        sentence.setText(filterText(text));
        getTerminology().process(sentence);
        if (this.handleAcronyms) {
            getAcronymDetector().process(sentence);
        }
        if (this.handleNegation) {
            getConText().process(sentence);
        }
        sentence.setText(text);
        sentence.getProcessTime().put(getClass().getSimpleName(), Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
        return sentence;
    }

    public ConText getConText() {
        if (this.conText == null) {
            this.conText = new ConText();
        }
        return this.conText;
    }

    public void setConText(ConText conText) {
        this.conText = conText;
    }

    private boolean filterSentence(Sentence sentence) {
        if (sentence.getText().length() == 0) {
            return true;
        }
        if ((getProcessFilter() & FILTER_HEADER) > 0 && Sentence.TYPE_HEADER.equals(sentence.getSentenceType())) {
            return true;
        }
        if ((getProcessFilter() & FILTER_WORKSHEET) <= 0 || !Sentence.TYPE_WORKSHEET.equals(sentence.getSentenceType())) {
            return (getProcessFilter() & FILTER_DEID) > 0 && DeIDUtils.isDeIDHeader(sentence.getText());
        }
        return true;
    }

    private String filterText(String str) {
        return (getProcessFilter() & FILTER_DEID) > 0 ? DeIDUtils.filterDeIDTags(str) : str;
    }

    @Override // edu.pitt.dbmi.nlp.noble.coder.model.Processor
    public long getProcessTime() {
        return this.time;
    }
}
