package org.apache.any23.extractor.microdata;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.extractor.microdata.ItemPropValue;
import org.apache.any23.extractor.rdfa.RDFa11Parser;
import org.apache.any23.rdf.RDFUtils;
import org.apache.commons.lang3.StringUtils;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.datatypes.XMLDatatypeUtil;
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.jsoup.parser.Tag;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.traversal.NodeFilter;

/* loaded from: input_file:org/apache/any23/extractor/microdata/MicrodataParser.class */
public class MicrodataParser {
    private final Document document;
    private final Set<String> loopDetectorSet = new HashSet();
    private final Map<Node, ItemScope> itemScopes = new HashMap();
    private final Map<Node, ItemPropValue> itemPropValues = new HashMap();
    private int dereferenceRecursionCounter = 0;
    private ErrorMode errorMode = ErrorMode.FULL_REPORT;
    private final List<MicrodataParserException> errors = new ArrayList();
    public static final String ITEMSCOPE_ATTRIBUTE = "itemscope";
    public static final String ITEMPROP_ATTRIBUTE = "itemprop";
    private static final String REVERSE_ITEMPROP_ATTRIBUTE = "itemprop-reverse";
    public static final Set<String> SRC_TAGS = Collections.unmodifiableSet(new HashSet(Arrays.asList("audio", "embed", "frame", "iframe", "img", "source", "track", "video", "input", "layer", "script", "textarea")));
    public static final Set<String> HREF_TAGS = Collections.unmodifiableSet(new HashSet(Arrays.asList("a", "area", "link")));
    private static final String[] EMPTY_STRINGS = new String[0];

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/any23/extractor/microdata/MicrodataParser$ErrorMode.class */
    public enum ErrorMode {
        STOP_AT_FIRST_ERROR,
        FULL_REPORT
    }

    public MicrodataParser(Document document) {
        if (document == null) {
            throw new NullPointerException("Document cannot be null.");
        }
        this.document = document;
    }

    public static List<Node> getItemScopeNodes(Node node) {
        return DomUtils.findAllByAttributeName(node, ITEMSCOPE_ATTRIBUTE);
    }

    public static boolean isItemScope(Node node) {
        return DomUtils.readAttribute(node, ITEMSCOPE_ATTRIBUTE, null) != null;
    }

    public static List<Node> getItemPropNodes(Node node) {
        return DomUtils.findAllByAttributeName(node, ITEMPROP_ATTRIBUTE);
    }

    public static boolean isItemProp(Node node) {
        return DomUtils.readAttribute(node, ITEMPROP_ATTRIBUTE, null) != null;
    }

    private static boolean isContainedInItemScope(Node node) {
        Node parentNode = node.getParentNode();
        while (true) {
            Node node2 = parentNode;
            if (node2 == null) {
                return false;
            }
            NamedNodeMap attributes = node2.getAttributes();
            if (attributes != null && attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
                return true;
            }
            parentNode = node2.getParentNode();
        }
    }

    private static boolean isContainedInId(Node node, Set<String> set) {
        do {
            String readAttribute = DomUtils.readAttribute(node, "id", null);
            if (readAttribute != null && set.contains(readAttribute)) {
                return true;
            }
            node = node.getParentNode();
        } while (node != null);
        return false;
    }

    public static List<Node> getTopLevelItemScopeNodes(Node node) {
        List<Node> itemScopeNodes = getItemScopeNodes(node);
        ArrayList arrayList = new ArrayList();
        ArrayList<Node> arrayList2 = new ArrayList();
        for (Node node2 : itemScopeNodes) {
            if (!isItemProp(node2) && DomUtils.readAttribute(node2, REVERSE_ITEMPROP_ATTRIBUTE, null) == null) {
                arrayList.add(node2);
            } else if (!isContainedInItemScope(node2)) {
                arrayList2.add(node2);
            }
        }
        if (!arrayList2.isEmpty()) {
            Set set = (Set) itemScopeNodes.stream().flatMap(node3 -> {
                return Arrays.stream(itemrefIds(node3));
            }).collect(Collectors.toSet());
            for (Node node4 : arrayList2) {
                if (!isContainedInId(node4, set)) {
                    arrayList.add(node4);
                }
            }
        }
        return arrayList;
    }

    public static MicrodataParserReport getMicrodata(Document document, ErrorMode errorMode) throws MicrodataParserException {
        List<Node> topLevelItemScopeNodes = getTopLevelItemScopeNodes(document);
        ArrayList arrayList = new ArrayList();
        MicrodataParser microdataParser = new MicrodataParser(document);
        microdataParser.setErrorMode(errorMode);
        Iterator<Node> it = topLevelItemScopeNodes.iterator();
        while (it.hasNext()) {
            arrayList.add(microdataParser.getItemScope(it.next()));
        }
        return new MicrodataParserReport((ItemScope[]) arrayList.toArray(new ItemScope[arrayList.size()]), microdataParser.getErrors());
    }

    public static MicrodataParserReport getMicrodata(Document document) {
        try {
            return getMicrodata(document, ErrorMode.FULL_REPORT);
        } catch (MicrodataParserException e) {
            throw new IllegalStateException("Unexpected exception.", e);
        }
    }

    public static void getMicrodataAsJSON(Document document, PrintStream printStream) {
        MicrodataParserReport microdata = getMicrodata(document);
        ItemScope[] detectedItemScopes = microdata.getDetectedItemScopes();
        MicrodataParserException[] errors = microdata.getErrors();
        printStream.append("{ ");
        printStream.append("\"result\" : [");
        for (int i = 0; i < detectedItemScopes.length; i++) {
            if (i > 0) {
                printStream.print(", ");
            }
            printStream.print(detectedItemScopes[i].toJSON());
        }
        printStream.append("] ");
        if (errors != null && errors.length > 0) {
            printStream.append(", ");
            printStream.append("\"errors\" : [");
            for (int i2 = 0; i2 < errors.length; i2++) {
                if (i2 > 0) {
                    printStream.print(", ");
                }
                printStream.print(errors[i2].toJSON());
            }
            printStream.append("] ");
        }
        printStream.append("}");
    }

    public void setErrorMode(ErrorMode errorMode) {
        if (errorMode == null) {
            throw new IllegalArgumentException("errorMode must be not null.");
        }
        this.errorMode = errorMode;
    }

    public ErrorMode getErrorMode() {
        return this.errorMode;
    }

    public MicrodataParserException[] getErrors() {
        return this.errors == null ? new MicrodataParserException[0] : (MicrodataParserException[]) this.errors.toArray(new MicrodataParserException[this.errors.size()]);
    }

    public ItemPropValue getPropertyValue(Node node) throws MicrodataParserException {
        ItemPropValue itemPropValue = this.itemPropValues.get(node);
        if (itemPropValue != null) {
            return itemPropValue;
        }
        if (isItemScope(node)) {
            return new ItemPropValue(getItemScope(node), ItemPropValue.Type.Nested);
        }
        String lowerCase = node.getNodeName().toLowerCase(Locale.ROOT);
        if ("data".equals(lowerCase) || "meter".equals(lowerCase)) {
            String value = value(node, "value");
            return new ItemPropValue(XMLDatatypeUtil.isValidInteger(value) ? RDFUtils.literal(value, XSD.INTEGER) : XMLDatatypeUtil.isValidDouble(value) ? RDFUtils.literal(value, XSD.DOUBLE) : RDFUtils.literal(value));
        }
        if ("time".equals(lowerCase)) {
            String value2 = value(node, "datetime");
            return new ItemPropValue(XMLDatatypeUtil.isValidDate(value2) ? RDFUtils.literal(value2, XSD.DATE) : XMLDatatypeUtil.isValidTime(value2) ? RDFUtils.literal(value2, XSD.TIME) : XMLDatatypeUtil.isValidDateTime(value2) ? RDFUtils.literal(value2, XSD.DATETIME) : XMLDatatypeUtil.isValidGYearMonth(value2) ? RDFUtils.literal(value2, XSD.GYEARMONTH) : XMLDatatypeUtil.isValidGYear(value2) ? RDFUtils.literal(value2, XSD.GYEAR) : XMLDatatypeUtil.isValidDuration(value2) ? RDFUtils.literal(value2, XSD.DURATION) : RDFUtils.literal(value2, getLanguage(node)));
        }
        if (SRC_TAGS.contains(lowerCase)) {
            return link(node, RDFa11Parser.SRC_ATTRIBUTE);
        }
        if (HREF_TAGS.contains(lowerCase)) {
            return link(node, RDFa11Parser.HREF_ATTRIBUTE);
        }
        if ("object".equals(lowerCase)) {
            return link(node, "data");
        }
        String readAttribute = DomUtils.readAttribute(node, RDFa11Parser.CONTENT_ATTRIBUTE, null);
        if (readAttribute != null) {
            return new ItemPropValue(RDFUtils.literal(readAttribute, getLanguage(node)));
        }
        ItemPropValue itemPropValue2 = new ItemPropValue(RDFUtils.literal(textContent(node), getLanguage(node)));
        this.itemPropValues.put(node, itemPropValue2);
        return itemPropValue2;
    }

    private static String textContent(Node node) {
        StringBuilder sb = new StringBuilder();
        appendFormatted(node, sb, false);
        return sb.toString();
    }

    private static boolean shouldSeparateWithNewline(CharSequence charSequence, CharSequence charSequence2) {
        char charAt;
        int length = charSequence2.length();
        for (int i = 0; i < length; i++) {
            char charAt2 = charSequence2.charAt(i);
            if (charAt2 == '\n' || charAt2 == '\r') {
                return false;
            }
            if (!Character.isWhitespace(charAt2)) {
                break;
            }
        }
        for (int length2 = charSequence.length() - 1; length2 >= 0 && (charAt = charSequence.charAt(length2)) != '\n' && charAt != '\r'; length2--) {
            if (!Character.isWhitespace(charAt)) {
                return true;
            }
        }
        return false;
    }

    private static boolean appendFormatted(Node node, StringBuilder sb, boolean z) {
        switch (node.getNodeType()) {
            case 1:
                String lowerCase = node.getNodeName().toLowerCase(Locale.ENGLISH);
                boolean z2 = "br".equals(lowerCase) || Tag.valueOf(lowerCase).isBlock();
                NodeList childNodes = node.getChildNodes();
                boolean z3 = z || z2;
                int length = childNodes.getLength();
                for (int i = 0; i < length; i++) {
                    z3 = appendFormatted(childNodes.item(i), sb, z3);
                }
                return z3 || z2;
            case 3:
                String textContent = node.getTextContent();
                if (textContent.isEmpty()) {
                    return z;
                }
                if (z && shouldSeparateWithNewline(sb, textContent)) {
                    sb.append('\n');
                }
                sb.append(textContent);
                return false;
            default:
                return z;
        }
    }

    private static String content(Node node, String str) {
        NamedNodeMap attributes = node.getAttributes();
        if (attributes == null) {
            return null;
        }
        Node namedItem = attributes.getNamedItem(RDFa11Parser.CONTENT_ATTRIBUTE);
        if (namedItem != null) {
            return namedItem.getNodeValue();
        }
        Node namedItem2 = attributes.getNamedItem(str);
        if (namedItem2 != null) {
            return namedItem2.getNodeValue();
        }
        return null;
    }

    private static String value(Node node, String str) {
        String content = content(node, str);
        return StringUtils.stripToEmpty(content != null ? content : node.getTextContent());
    }

    private static ItemPropValue link(Node node, String str) {
        String content = content(node, str);
        return content == null ? new ItemPropValue(RDFUtils.literal("")) : new ItemPropValue(content, ItemPropValue.Type.Link);
    }

    private static String getLanguage(Node node) {
        do {
            String readAttribute = DomUtils.readAttribute(node, RDFa11Parser.XML_LANG_ATTRIBUTE, null);
            if (StringUtils.isNotBlank(readAttribute)) {
                return readAttribute.trim();
            }
            String readAttribute2 = DomUtils.readAttribute(node, "lang", null);
            if (StringUtils.isNotBlank(readAttribute2)) {
                return readAttribute2.trim();
            }
            node = node.getParentNode();
        } while (node != null);
        return null;
    }

    public List<ItemProp> getItemProps(final Node node, boolean z) throws MicrodataParserException {
        final LinkedHashSet<Node> linkedHashSet = new LinkedHashSet();
        boolean z2 = false;
        if (!z) {
            NamedNodeMap attributes = node.getAttributes();
            if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null || attributes.getNamedItem(REVERSE_ITEMPROP_ATTRIBUTE) != null) {
                linkedHashSet.add(node);
            }
            if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
                z2 = true;
            }
        }
        if (!z2) {
            do {
            } while (node.getOwnerDocument().createTreeWalker(node, 1, new NodeFilter() { // from class: org.apache.any23.extractor.microdata.MicrodataParser.1
                public short acceptNode(Node node2) {
                    if (node2.getNodeType() != 1) {
                        return (short) 1;
                    }
                    NamedNodeMap attributes2 = node2.getAttributes();
                    if ((attributes2.getNamedItem(MicrodataParser.ITEMPROP_ATTRIBUTE) != null || attributes2.getNamedItem(MicrodataParser.REVERSE_ITEMPROP_ATTRIBUTE) != null) && node != node2) {
                        linkedHashSet.add(node2);
                    }
                    return attributes2.getNamedItem(MicrodataParser.ITEMSCOPE_ATTRIBUTE) != null ? (short) 2 : (short) 1;
                }
            }, false).nextNode() != null);
        }
        ArrayList arrayList = new ArrayList();
        for (Node node2 : linkedHashSet) {
            String readAttribute = DomUtils.readAttribute(node2, ITEMPROP_ATTRIBUTE, null);
            String readAttribute2 = DomUtils.readAttribute(node2, REVERSE_ITEMPROP_ATTRIBUTE, null);
            boolean isNotBlank = StringUtils.isNotBlank(readAttribute);
            boolean isNotBlank2 = StringUtils.isNotBlank(readAttribute2);
            if (isNotBlank || isNotBlank2) {
                try {
                    ItemPropValue propertyValue = getPropertyValue(node2);
                    if (isNotBlank) {
                        for (String str : readAttribute.trim().split("\\s+")) {
                            arrayList.add(new ItemProp(DomUtils.getXPathForNode(node2), str, propertyValue, false));
                        }
                    }
                    if (isNotBlank2) {
                        if (propertyValue.literal != null) {
                            manageError(new MicrodataParserException("itemprop-reverse cannot point to a literal", node2));
                        } else {
                            for (String str2 : readAttribute2.trim().split("\\s+")) {
                                arrayList.add(new ItemProp(DomUtils.getXPathForNode(node2), str2, propertyValue, true));
                            }
                        }
                    }
                } catch (MicrodataParserException e) {
                    manageError(e);
                }
            } else {
                manageError(new MicrodataParserException("invalid property name '" + readAttribute + "'", node2));
            }
        }
        return arrayList;
    }

    public ItemProp[] deferProperties(String... strArr) throws MicrodataParserException {
        Document document = this.document;
        this.dereferenceRecursionCounter++;
        ArrayList arrayList = new ArrayList();
        try {
            try {
                for (String str : strArr) {
                    if (this.loopDetectorSet.contains(str)) {
                        throw new MicrodataParserException(String.format(Locale.ROOT, "Loop detected with depth %d while dereferencing itemProp '%s' .", Integer.valueOf(this.dereferenceRecursionCounter - 1), str), null);
                    }
                    this.loopDetectorSet.add(str);
                    Element elementById = document.getElementById(str);
                    if (elementById == null) {
                        manageError(new MicrodataParserException(String.format(Locale.ROOT, "Unknown itemProp id '%s'", str), null));
                    } else {
                        arrayList.addAll(getItemProps(elementById, false));
                    }
                }
                this.dereferenceRecursionCounter--;
                if (this.dereferenceRecursionCounter == 0) {
                    this.loopDetectorSet.clear();
                }
            } catch (MicrodataParserException e) {
                if (this.dereferenceRecursionCounter != 1) {
                    throw e;
                }
                manageError(e);
                this.dereferenceRecursionCounter--;
                if (this.dereferenceRecursionCounter == 0) {
                    this.loopDetectorSet.clear();
                }
            }
            return (ItemProp[]) arrayList.toArray(new ItemProp[arrayList.size()]);
        } catch (Throwable th) {
            this.dereferenceRecursionCounter--;
            if (this.dereferenceRecursionCounter == 0) {
                this.loopDetectorSet.clear();
            }
            throw th;
        }
    }

    private static String[] itemrefIds(Node node) {
        String readAttribute = DomUtils.readAttribute(node, "itemref", null);
        return StringUtils.isBlank(readAttribute) ? EMPTY_STRINGS : readAttribute.trim().split("\\s+");
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v60, types: [java.util.List] */
    public ItemScope getItemScope(Node node) throws MicrodataParserException {
        ArrayList arrayList;
        ItemScope itemScope = this.itemScopes.get(node);
        if (itemScope != null) {
            return itemScope;
        }
        String readAttribute = DomUtils.readAttribute(node, "id", null);
        String readAttribute2 = DomUtils.readAttribute(node, "itemtype", null);
        String readAttribute3 = DomUtils.readAttribute(node, "itemid", null);
        List<ItemProp> itemProps = getItemProps(node, true);
        String[] itemrefIds = itemrefIds(node);
        try {
            for (ItemProp itemProp : deferProperties(itemrefIds)) {
                if (itemProps.contains(itemProp)) {
                    manageError(new MicrodataParserException(String.format(Locale.ROOT, "Duplicated deferred itemProp '%s'.", itemProp.getName()), node));
                } else {
                    itemProps.add(itemProp);
                }
            }
            if (readAttribute2 == null) {
                arrayList = Collections.emptyList();
            } else {
                arrayList = new ArrayList();
                boolean z = false;
                for (String str : readAttribute2.trim().split("\\s+")) {
                    try {
                        z = arrayList.addAll(ItemScope.stringToSingletonIRI(str));
                    } catch (RuntimeException e) {
                        if (z) {
                            int size = arrayList.size() - 1;
                            try {
                                List<IRI> stringToSingletonIRI = ItemScope.stringToSingletonIRI(((IRI) arrayList.get(size)).stringValue() + " " + str);
                                arrayList.remove(size);
                                z = arrayList.addAll(stringToSingletonIRI);
                            } catch (RuntimeException e2) {
                                manageError(new MicrodataParserException(e.getMessage(), node));
                                z = false;
                            }
                        } else {
                            manageError(new MicrodataParserException(e.getMessage(), node));
                        }
                    }
                }
            }
            ItemScope itemScope2 = new ItemScope(DomUtils.getXPathForNode(node), (ItemProp[]) itemProps.toArray(new ItemProp[itemProps.size()]), readAttribute, itemrefIds, arrayList, readAttribute3);
            this.itemScopes.put(node, itemScope2);
            return itemScope2;
        } catch (MicrodataParserException e3) {
            e3.setErrorNode(node);
            throw e3;
        }
    }

    private void manageError(MicrodataParserException microdataParserException) throws MicrodataParserException {
        switch (this.errorMode) {
            case FULL_REPORT:
                this.errors.add(microdataParserException);
                return;
            case STOP_AT_FIRST_ERROR:
                throw microdataParserException;
            default:
                throw new IllegalStateException("Unsupported mode " + this.errorMode);
        }
    }
}
