package edu.isi.nlp.corenlp;

import com.google.common.annotations.Beta;
import com.google.common.base.Charsets;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.io.CharSource;
import com.google.common.io.Files;
import edu.isi.nlp.corenlp.CoreNLPDocument;
import edu.isi.nlp.corenlp.CoreNLPSentence;
import edu.isi.nlp.parsing.HeadFinder;
import edu.isi.nlp.strings.offsets.OffsetRange;
import edu.isi.nlp.symbols.Symbol;
import edu.isi.nlp.xml.XMLUtils;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

@Beta
/* loaded from: input_file:edu/isi/nlp/corenlp/CoreNLPXMLLoader.class */
public final class CoreNLPXMLLoader {
    private final HeadFinder<CoreNLPParseNode> headFinder;
    private final boolean stripFunctionTags;

    /* loaded from: input_file:edu/isi/nlp/corenlp/CoreNLPXMLLoader$CoreNLPXMLLoaderBuilder.class */
    public static class CoreNLPXMLLoaderBuilder {
        private final HeadFinder<CoreNLPParseNode> headFinder;
        private boolean stripFunctionTags;

        private CoreNLPXMLLoaderBuilder(HeadFinder<CoreNLPParseNode> headFinder) {
            this.stripFunctionTags = true;
            this.headFinder = headFinder;
        }

        public CoreNLPXMLLoaderBuilder keepFunctionTags() {
            throw new UnsupportedOperationException("I can't let you do that Dave.");
        }

        public CoreNLPXMLLoader build() {
            return new CoreNLPXMLLoader(this.headFinder, this.stripFunctionTags);
        }
    }

    private CoreNLPXMLLoader(HeadFinder<CoreNLPParseNode> headFinder, boolean z) {
        this.headFinder = (HeadFinder) Preconditions.checkNotNull(headFinder);
        this.stripFunctionTags = z;
    }

    public CoreNLPDocument loadFrom(File file) throws IOException {
        Preconditions.checkNotNull(file);
        try {
            return loadFrom(Files.asCharSource(file, Charsets.UTF_8));
        } catch (Exception e) {
            throw new IOException(String.format("Error loading StanfordXML document %s", file.getAbsolutePath()), e);
        }
    }

    public CoreNLPDocument loadFrom(CharSource charSource) throws IOException {
        Preconditions.checkNotNull(charSource);
        return loadFromString(charSource.read());
    }

    private CoreNLPDocument loadFromString(String str) throws IOException {
        if (str.indexOf("<root>") < 0) {
            throw new IOException("could not find a root element for this document");
        }
        InputSource inputSource = new InputSource(new StringReader(str));
        try {
            DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
            newInstance.setNamespaceAware(true);
            return loadFrom(newInstance.newDocumentBuilder().parse(inputSource));
        } catch (ParserConfigurationException | SAXException e) {
            throw new RuntimeException("Error parsing xml", e);
        }
    }

    private CoreNLPDocument loadFrom(Document document) {
        Element documentElement = document.getDocumentElement();
        String tagName = documentElement.getTagName();
        if (!tagName.equalsIgnoreCase("root")) {
            if (tagName.equalsIgnoreCase("StanfordDocument")) {
                return loadFrom(documentElement);
            }
            throw new RuntimeException("StanfordXML should have a root of root or document");
        }
        Optional directChild = XMLUtils.directChild(documentElement, "document");
        if (directChild.isPresent()) {
            return loadFrom((Element) directChild.get());
        }
        throw new RuntimeException("If a StanfordXML has StanfordXML tag at the top-level, it must have a StanfordDocument element immediately below it");
    }

    private CoreNLPDocument loadFrom(Element element) {
        return toDocument(element);
    }

    private CoreNLPDocument toDocument(Element element) {
        Preconditions.checkArgument(element.getTagName().equalsIgnoreCase("Document"));
        CoreNLPDocument.CoreNLPDocumentBuilder builder = CoreNLPDocument.builder();
        Node firstChild = element.getFirstChild();
        while (true) {
            Node node = firstChild;
            if (node == null) {
                return builder.build();
            }
            if (node instanceof Element) {
                Element element2 = (Element) node;
                if (element2.getTagName().equalsIgnoreCase("sentences")) {
                    builder.withSentences(toSentences(element2));
                }
            }
            firstChild = node.getNextSibling();
        }
    }

    private ImmutableList<CoreNLPSentence> toSentences(Element element) {
        ImmutableList.Builder builder = ImmutableList.builder();
        Node firstChild = element.getFirstChild();
        while (true) {
            Node node = firstChild;
            if (node == null) {
                return builder.build();
            }
            if (node instanceof Element) {
                Element element2 = (Element) node;
                if (element2.getTagName().equalsIgnoreCase("sentence")) {
                    builder.add(toSentence(element2));
                }
            }
            firstChild = node.getNextSibling();
        }
    }

    private CoreNLPSentence toSentence(Element element) {
        CoreNLPSentence.StanfordSentenceBuilder builder = CoreNLPSentence.builder();
        Iterable iterable = null;
        String attribute = element.getAttribute("id");
        try {
            for (Node firstChild = element.getFirstChild(); firstChild != null; firstChild = firstChild.getNextSibling()) {
                if (firstChild instanceof Element) {
                    Element element2 = (Element) firstChild;
                    if (element2.getTagName().equalsIgnoreCase("tokens")) {
                        if (iterable != null) {
                            throw new RuntimeException("Can't have tokens twice!");
                        }
                        iterable = toTokens(element2);
                        builder.withTokens(iterable);
                    }
                    if (element2.getTagName().equals("parse")) {
                        builder.withParse(Optional.of(CoreNLPConstituencyParse.create(this.headFinder, iterable, element2.getTextContent(), this.stripFunctionTags)));
                    }
                }
            }
            return builder.build();
        } catch (Exception e) {
            throw new RuntimeException("Error parsing CoreNLP document in sentence " + attribute, e);
        }
    }

    private ImmutableList<CoreNLPToken> toTokens(Element element) {
        ImmutableList.Builder builder = ImmutableList.builder();
        Node firstChild = element.getFirstChild();
        while (true) {
            Node node = firstChild;
            if (node == null) {
                return builder.build();
            }
            if (node instanceof Element) {
                Element element2 = (Element) node;
                if (element2.getTagName().equalsIgnoreCase("token")) {
                    builder.add(toToken(element2));
                }
            }
            firstChild = node.getNextSibling();
        }
    }

    private CoreNLPToken toToken(Element element) {
        String str = null;
        int i = -1;
        int i2 = -1;
        String str2 = null;
        Node firstChild = element.getFirstChild();
        while (true) {
            Node node = firstChild;
            if (node == null) {
                return CoreNLPToken.create(Symbol.from(str2), str, OffsetRange.charOffsetRange(i, i2));
            }
            if (node instanceof Element) {
                Element element2 = (Element) node;
                if (element2.getTagName().equalsIgnoreCase("word")) {
                    str = node.getTextContent();
                }
                if (element2.getTagName().equalsIgnoreCase("CharacterOffsetBegin")) {
                    i = Integer.parseInt(node.getTextContent());
                }
                if (element2.getTagName().equalsIgnoreCase("CharacterOffsetEnd")) {
                    i2 = Integer.parseInt(node.getTextContent()) - 1;
                }
                if (element2.getTagName().equalsIgnoreCase("POS")) {
                    str2 = node.getTextContent();
                }
            }
            firstChild = node.getNextSibling();
        }
    }

    public static CoreNLPXMLLoaderBuilder builder(HeadFinder<CoreNLPParseNode> headFinder) {
        return new CoreNLPXMLLoaderBuilder(headFinder);
    }
}
