/*
 * Decompiled with CFR 0.152.
 */
package de.datexis.sector.reader;

import de.datexis.common.Resource;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.model.Document;
import de.datexis.model.Sentence;
import de.datexis.preprocess.DocumentFactory;
import de.datexis.reader.RawTextDatasetReader;
import de.datexis.sector.model.SectionAnnotation;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.LineIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ChoiDatasetReader
extends RawTextDatasetReader {
    protected static final Logger log = LoggerFactory.getLogger(ChoiDatasetReader.class);
    protected static final String SEGMENT_SPLIT = "==========";

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    public Document readDocumentFromFile(Resource file) {
        try (InputStream in = file.getInputStream();){
            String sectionText;
            CharsetDecoder utf8 = StandardCharsets.UTF_8.newDecoder();
            BufferedReader br = new BufferedReader(new InputStreamReader(in, utf8));
            LineIterator it = new LineIterator((Reader)br);
            boolean j = false;
            Document doc = new Document();
            doc.setId(file.getFileName());
            doc.setSource((Object)file.toString());
            doc.setType("seg");
            StringBuilder text = new StringBuilder();
            SectionAnnotation ann = new SectionAnnotation(Annotation.Source.GOLD);
            String sectionHeading = "";
            while (it.hasNext()) {
                String line = (String)it.next();
                if (line.equals(SEGMENT_SPLIT)) {
                    sectionText = text.toString();
                    if (sectionText.trim().length() > 0) {
                        this.addToDocument(sectionText, doc);
                    }
                    text = new StringBuilder();
                    continue;
                }
                if (text.length() > 0) {
                    text.append(" ");
                }
                if (line.trim().isEmpty()) continue;
                if (!(line.endsWith(".") || line.endsWith("!") || line.endsWith("?"))) {
                    line = line + ".";
                }
                text.append(line).append("\n");
            }
            sectionText = text.toString();
            if (sectionText.trim().length() > 0) {
                this.addToDocument(sectionText, doc);
            }
            Document document = doc;
            return document;
        }
        catch (IOException ex) {
            log.error(ex.toString());
            throw new RuntimeException(ex.toString(), ex.getCause());
        }
    }

    private void addToDocument(String text, Document doc) {
        if (text.trim().length() == 0) {
            return;
        }
        Document section = new Document();
        for (String paragraph : text.split("\n")) {
            if (paragraph.trim().isEmpty()) continue;
            Document temp = DocumentFactory.fromText((String)(paragraph.trim() + "\n"), (DocumentFactory.Newlines)DocumentFactory.Newlines.KEEP);
            section.addSentence(DocumentFactory.createSentenceFromTokens((List)temp.getTokens()));
        }
        doc.append(section);
        String sectionHeading = Integer.toString(section.getBegin());
        SectionAnnotation sectionAnn = new SectionAnnotation(Annotation.Source.GOLD, doc.getType(), sectionHeading);
        sectionAnn.setSectionLabel(sectionHeading);
        sectionAnn.setBegin(section.getBegin());
        sectionAnn.setEnd(section.getEnd());
        doc.addAnnotation((Annotation)sectionAnn);
    }

    public static void readC99Result(Dataset dataset, Resource path) throws IOException {
        Iterator files = Files.walk(path.getPath(), new FileVisitOption[0]).filter(p -> Files.isRegularFile(p, LinkOption.NOFOLLOW_LINKS)).filter(p -> p.getFileName().toString().endsWith(".pred")).sorted().iterator();
        Pattern pattern = Pattern.compile("\\/(\\d+)(.ref)?.pred$");
        while (files.hasNext()) {
            String f = ((Path)files.next()).toString();
            Matcher matcher = pattern.matcher(f);
            matcher.find();
            int docId = Integer.parseInt(matcher.group(1));
            log.info("reading doc id {} from file {}", (Object)docId, (Object)f);
            Resource file = Resource.fromFile((String)f);
            InputStream in = file.getInputStream();
            Throwable throwable = null;
            try {
                CharsetDecoder utf8 = StandardCharsets.UTF_8.newDecoder();
                BufferedReader br = new BufferedReader(new InputStreamReader(in, utf8));
                LineIterator it = new LineIterator((Reader)br);
                int k = 0;
                int j = 0;
                int length = 0;
                Document doc = (Document)dataset.getDocument(docId).get();
                SectionAnnotation ann = new SectionAnnotation(Annotation.Source.PRED);
                while (it.hasNext()) {
                    String line = (String)it.next();
                    if (line.equals(SEGMENT_SPLIT)) {
                        ++j;
                        if (length > 0) {
                            doc.addAnnotation((Annotation)ann);
                        }
                        ann = new SectionAnnotation(Annotation.Source.PRED);
                        ann.setSectionLabel(Integer.toString(j));
                        length = 0;
                        continue;
                    }
                    Sentence s = doc.getSentence(k);
                    if (length == 0) {
                        ann.setBegin(s.getBegin());
                    } else {
                        ann.setEnd(s.getEnd());
                    }
                    ++length;
                    ++k;
                    if (s.getText().trim().equals(line.trim())) continue;
                    log.warn("docId {} k={} different sentences\n{}\n{}", new Object[]{docId, k, line, s.getText()});
                }
            }
            catch (Throwable throwable2) {
                throwable = throwable2;
                throw throwable2;
            }
            finally {
                if (in == null) continue;
                if (throwable != null) {
                    try {
                        in.close();
                    }
                    catch (Throwable throwable3) {
                        throwable.addSuppressed(throwable3);
                    }
                    continue;
                }
                in.close();
            }
        }
    }
}

