package org.apache.any23.cli;

import com.beust.jcommander.IStringConverter;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.beust.jcommander.Parameters;
import com.beust.jcommander.converters.FileConverter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.any23.Any23;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.filter.IgnoreAccidentalRDFa;
import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.writer.BenchmarkTripleHandler;
import org.apache.any23.writer.LoggingTripleHandler;
import org.apache.any23.writer.ReportingTripleHandler;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.apache.any23.writer.WriterFactoryRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Parameters(commandNames = {"rover"}, commandDescription = "Any23 Command Line Tool.")
/* loaded from: input_file:org/apache/any23/cli/Rover.class */
public class Rover implements Tool {
    private static final int DEFAULT_FORMAT_INDEX = 0;

    @Parameter(names = {"-o", "--output"}, description = "Specify Output file (defaults to standard output)", converter = PrintStreamConverter.class)
    private PrintStream outputStream = System.out;

    @Parameter(description = "input IRIs {<url>|<file>}+", converter = ArgumentToIRIConverter.class)
    protected List<String> inputIRIs = new LinkedList();

    @Parameter(names = {"-e", "--extractors"}, description = "a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle")
    private List<String> extractors = new LinkedList();

    @Parameter(names = {"-f", "--format"}, description = "the output format")
    private String format = FORMATS.get(DEFAULT_FORMAT_INDEX);

    @Parameter(names = {"-l", "--log"}, description = "Produce log within a file.", converter = FileConverter.class)
    private File logFile = null;

    @Parameter(names = {"-s", "--stats"}, description = "Print out extraction statistics.")
    private boolean statistics;

    @Parameter(names = {"-t", "--notrivial"}, description = "Filter trivial statements (e.g. CSS related ones).")
    private boolean noTrivial;

    @Parameter(names = {"-p", "--pedantic"}, description = "Validate and fixes HTML content detecting commons issues.")
    private boolean pedantic;

    @Parameter(names = {"-n", "--nesting"}, description = "Disable production of nesting triples.")
    private boolean nestingDisabled;

    @Parameter(names = {"-d", "--defaultns"}, description = "Override the default namespace used to produce statements.")
    private String defaultns;
    private TripleHandler tripleHandler;
    private ReportingTripleHandler reportingTripleHandler;
    private BenchmarkTripleHandler benchmarkTripleHandler;
    private Any23 any23;
    private ExtractionParameters extractionParameters;
    private static final List<String> FORMATS = WriterFactoryRegistry.getInstance().getIdentifiers();
    private static final Logger logger = LoggerFactory.getLogger(Rover.class);

    /* loaded from: input_file:org/apache/any23/cli/Rover$ArgumentToIRIConverter.class */
    public static final class ArgumentToIRIConverter implements IStringConverter<String> {
        /* renamed from: convert, reason: merged with bridge method [inline-methods] */
        public String m4convert(String str) {
            String trim = str.trim();
            if (trim.toLowerCase().startsWith("http:") || trim.toLowerCase().startsWith("https:")) {
                try {
                    return new URL(trim).toString();
                } catch (MalformedURLException e) {
                    throw new ParameterException(String.format("Invalid IRI: '%s': %s", trim, e.getMessage()));
                }
            }
            File file = new File(trim);
            if (!file.exists()) {
                throw new ParameterException(String.format("No such file: [%s]", file.getAbsolutePath()));
            }
            if (file.isDirectory()) {
                throw new ParameterException(String.format("Found a directory: [%s]", file.getAbsolutePath()));
            }
            return file.toURI().toString();
        }
    }

    /* loaded from: input_file:org/apache/any23/cli/Rover$PrintStreamConverter.class */
    public static final class PrintStreamConverter implements IStringConverter<PrintStream> {
        /* renamed from: convert, reason: merged with bridge method [inline-methods] */
        public PrintStream m5convert(String str) {
            File file = new File(str);
            try {
                return new PrintStream(file);
            } catch (FileNotFoundException e) {
                throw new ParameterException(String.format("Cannot open file '%s': %s", file, e.getMessage()));
            }
        }
    }

    protected void configure() {
        try {
            this.tripleHandler = WriterFactoryRegistry.getInstance().getWriterInstanceByIdentifier(this.format, this.outputStream);
            if (this.logFile != null) {
                try {
                    this.tripleHandler = new LoggingTripleHandler(this.tripleHandler, new PrintWriter(this.logFile));
                } catch (FileNotFoundException e) {
                    throw new IllegalArgumentException(String.format("Can not write to log file [%s]", this.logFile), e);
                }
            }
            if (this.statistics) {
                this.benchmarkTripleHandler = new BenchmarkTripleHandler(this.tripleHandler);
                this.tripleHandler = this.benchmarkTripleHandler;
            }
            if (this.noTrivial) {
                this.tripleHandler = new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(this.tripleHandler), true);
            }
            this.reportingTripleHandler = new ReportingTripleHandler(this.tripleHandler);
            DefaultConfiguration singleton = DefaultConfiguration.singleton();
            this.extractionParameters = this.pedantic ? new ExtractionParameters(singleton, ExtractionParameters.ValidationMode.ValidateAndFix, this.nestingDisabled) : new ExtractionParameters(singleton, ExtractionParameters.ValidationMode.None, this.nestingDisabled);
            if (this.defaultns != null) {
                this.extractionParameters.setProperty("any23.extraction.context.iri", this.defaultns);
            }
            this.any23 = this.extractors.isEmpty() ? new Any23() : new Any23((String[]) this.extractors.toArray(new String[this.extractors.size()]));
            this.any23.setHTTPUserAgent(Any23.DEFAULT_HTTP_CLIENT_USER_AGENT + "/" + Any23.VERSION);
        } catch (Exception e2) {
            throw new NullPointerException(String.format("Invalid output format '%s', admitted values: %s", this.format, FORMATS));
        }
    }

    protected String printReports() {
        StringBuilder sb = new StringBuilder();
        if (this.benchmarkTripleHandler != null) {
            sb.append(this.benchmarkTripleHandler.report()).append('\n');
        }
        if (this.reportingTripleHandler != null) {
            sb.append(this.reportingTripleHandler.printReport()).append('\n');
        }
        return sb.toString();
    }

    protected void performExtraction(DocumentSource documentSource) throws Exception {
        if (!this.any23.extract(this.extractionParameters, documentSource, this.reportingTripleHandler).hasMatchingExtractors()) {
            throw new IllegalStateException(String.format("No suitable extractors found for source %s", documentSource.getDocumentIRI()));
        }
    }

    protected void close() {
        if (this.tripleHandler != null) {
            try {
                this.tripleHandler.close();
            } catch (TripleHandlerException e) {
                throw new RuntimeException("Error while closing TripleHandler", e);
            }
        }
        if (this.outputStream == null || this.outputStream == System.out) {
            return;
        }
        this.outputStream.close();
    }

    public void run() throws Exception {
        if (this.inputIRIs.isEmpty()) {
            throw new IllegalArgumentException("Expected at least 1 argument.");
        }
        configure();
        try {
            long currentTimeMillis = System.currentTimeMillis();
            Iterator<String> it = this.inputIRIs.iterator();
            while (it.hasNext()) {
                performExtraction(this.any23.createDocumentSource(it.next()));
            }
            long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
            if (this.benchmarkTripleHandler != null) {
                System.err.println(this.benchmarkTripleHandler.report());
            }
            logger.info("Extractors used: " + this.reportingTripleHandler.getExtractorNames());
            logger.info(this.reportingTripleHandler.getTotalTriples() + " triples, " + currentTimeMillis2 + "ms");
            close();
        } catch (Throwable th) {
            close();
            throw th;
        }
    }
}
