package org.apache.any23.extractor.csv;

import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import java.io.IOException;
import java.io.InputStream;
import java.util.StringTokenizer;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.CSV;
import org.apache.commons.csv.CSVParser;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.XMLSchema;

/* loaded from: input_file:WEB-INF/lib/apache-any23-core-1.0.jar:org/apache/any23/extractor/csv/CSVExtractor.class */
public class CSVExtractor implements Extractor.ContentExtractor {
    private CSVParser csvParser;
    private URI[] headerURIs;
    private CSV csv = CSV.getInstance();

    @Override // org.apache.any23.extractor.Extractor.ContentExtractor
    public void setStopAtFirstError(boolean z) {
    }

    @Override // org.apache.any23.extractor.Extractor
    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream inputStream, ExtractionResult extractionResult) throws IOException, ExtractionException {
        URI documentURI = extractionContext.getDocumentURI();
        this.csvParser = CSVReaderBuilder.build(inputStream);
        String[] line = this.csvParser.getLine();
        this.headerURIs = processHeader(line, documentURI);
        writeHeaderPropertiesMetadata(line, extractionResult);
        int i = 0;
        while (true) {
            String[] line2 = this.csvParser.getLine();
            if (line2 == null) {
                addTableMetadataStatements(documentURI, extractionResult, i, this.headerURIs.length);
                return;
            }
            URI uri = RDFUtils.uri(documentURI.toString(), "row/" + i);
            extractionResult.writeTriple(uri, RDF.TYPE, this.csv.rowType);
            produceRowStatements(uri, line2, extractionResult);
            extractionResult.writeTriple(documentURI, this.csv.row, uri);
            extractionResult.writeTriple(uri, this.csv.rowPosition, new LiteralImpl(String.valueOf(i)));
            i++;
        }
    }

    private boolean isInteger(String str) {
        try {
            Integer.valueOf(str);
            return true;
        } catch (NumberFormatException e) {
            return false;
        }
    }

    private boolean isFloat(String str) {
        try {
            Float.valueOf(str);
            return true;
        } catch (NumberFormatException e) {
            return false;
        }
    }

    private void writeHeaderPropertiesMetadata(String[] strArr, ExtractionResult extractionResult) {
        int i = 0;
        for (URI uri : this.headerURIs) {
            if (i > this.headerURIs.length) {
                return;
            }
            if (!RDFUtils.isAbsoluteURI(strArr[i])) {
                extractionResult.writeTriple(uri, RDFS.LABEL, new LiteralImpl(strArr[i]));
            }
            extractionResult.writeTriple(uri, this.csv.columnPosition, new LiteralImpl(String.valueOf(i), XMLSchema.INTEGER));
            i++;
        }
    }

    private URI[] processHeader(String[] strArr, URI uri) {
        URI[] uriArr = new URI[strArr.length];
        int i = 0;
        for (String str : strArr) {
            String trim = str.trim();
            if (RDFUtils.isAbsoluteURI(trim)) {
                uriArr[i] = new URIImpl(trim);
            } else {
                uriArr[i] = normalize(trim, uri);
            }
            i++;
        }
        return uriArr;
    }

    private URI normalize(String str, URI uri) {
        String replace = str.trim().toLowerCase().replace("?", "").replace("&", "");
        StringBuilder sb = new StringBuilder(uri.toString());
        StringTokenizer stringTokenizer = new StringTokenizer(replace, MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR);
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            sb.append(Character.toUpperCase(nextToken.charAt(0))).append(nextToken.substring(1));
        }
        return new URIImpl(sb.toString());
    }

    private void produceRowStatements(URI uri, String[] strArr, ExtractionResult extractionResult) {
        int i = 0;
        for (String str : strArr) {
            if (i >= this.headerURIs.length) {
                return;
            }
            if (!str.equals("")) {
                extractionResult.writeTriple(uri, this.headerURIs[i], getObjectFromCell(str));
            }
            i++;
        }
    }

    private Value getObjectFromCell(String str) {
        Value literalImpl;
        String trim = str.trim();
        if (RDFUtils.isAbsoluteURI(trim)) {
            literalImpl = new URIImpl(trim);
        } else {
            URI uri = XMLSchema.STRING;
            if (isInteger(trim)) {
                uri = XMLSchema.INTEGER;
            } else if (isFloat(trim)) {
                uri = XMLSchema.FLOAT;
            }
            literalImpl = new LiteralImpl(trim, uri);
        }
        return literalImpl;
    }

    private void addTableMetadataStatements(URI uri, ExtractionResult extractionResult, int i, int i2) {
        extractionResult.writeTriple(uri, this.csv.numberOfRows, new LiteralImpl(String.valueOf(i), XMLSchema.INTEGER));
        extractionResult.writeTriple(uri, this.csv.numberOfColumns, new LiteralImpl(String.valueOf(i2), XMLSchema.INTEGER));
    }

    @Override // org.apache.any23.extractor.Extractor
    public ExtractorDescription getDescription() {
        return CSVExtractorFactory.getDescriptionInstance();
    }
}
