package org.apache.hop.pipeline.transforms.tika;

import com.google.gson.Gson;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.hop.core.exception.HopException;
import org.apache.hop.core.logging.ILogChannel;
import org.apache.hop.core.variables.IVariables;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/apache/hop/pipeline/transforms/tika/TikaOutput.class */
public class TikaOutput {
    private final TikaConfig tikaConfig;
    private final org.apache.tika.Tika tika;
    private final ILogChannel log;
    private final IVariables variables;
    private Detector detector;
    private Parser parser;
    private Metadata lastMetadata;
    private String encoding = null;
    private boolean prettyPrint = false;
    private ParseContext context = new ParseContext();

    /* loaded from: input_file:org/apache/hop/pipeline/transforms/tika/TikaOutput$NoDocumentJSONMetHandler.class */
    private class NoDocumentJSONMetHandler extends NoDocumentMetHandler {
        private NumberFormat formatter;
        private Gson gson;

        public NoDocumentJSONMetHandler(Metadata metadata, PrintWriter printWriter) {
            super(metadata, printWriter);
            this.formatter = NumberFormat.getInstance();
            this.gson = new Gson();
        }

        @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.NoDocumentMetHandler
        public void outputMetadata(String[] strArr) {
            this.writer.print("{ ");
            boolean z = true;
            for (String str : strArr) {
                if (z) {
                    z = false;
                } else {
                    this.writer.println(", ");
                }
                this.gson.toJson(str, this.writer);
                this.writer.print(":");
                outputValues(this.metadata.getValues(str));
            }
            this.writer.print(" }");
        }

        public void outputValues(String[] strArr) {
            if (strArr.length > 1) {
                this.writer.print("[");
            }
            for (int i = 0; i < strArr.length; i++) {
                String str = strArr[i];
                if (i > 0) {
                    this.writer.print(", ");
                }
                if (str == null || str.length() == 0) {
                    this.writer.print("null");
                } else {
                    ParsePosition parsePosition = new ParsePosition(0);
                    this.formatter.parse(str, parsePosition);
                    if (str.length() == parsePosition.getIndex()) {
                        this.writer.print(str.replaceFirst("^0+(\\d)", "$1"));
                    } else {
                        this.gson.toJson(str, this.writer);
                    }
                }
            }
            if (strArr.length > 1) {
                this.writer.print("]");
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hop/pipeline/transforms/tika/TikaOutput$NoDocumentMetHandler.class */
    public class NoDocumentMetHandler extends DefaultHandler {
        protected final Metadata metadata;
        protected PrintWriter writer;
        private boolean metOutput = false;

        public NoDocumentMetHandler(Metadata metadata, PrintWriter printWriter) {
            this.metadata = metadata;
            this.writer = printWriter;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endDocument() {
            String[] names = this.metadata.names();
            Arrays.sort(names);
            outputMetadata(names);
            this.writer.flush();
            this.metOutput = true;
        }

        public void outputMetadata(String[] strArr) {
            for (String str : strArr) {
                for (String str2 : this.metadata.getValues(str)) {
                    this.writer.println(str + ": " + str2);
                }
            }
        }

        public boolean metOutput() {
            return this.metOutput;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hop/pipeline/transforms/tika/TikaOutput$OutputType.class */
    public class OutputType {
        private OutputType() {
        }

        public void process(InputStream inputStream, OutputStream outputStream, Metadata metadata) throws Exception {
            ContentHandler contentHandler = getContentHandler(outputStream, metadata);
            TikaOutput.this.parser.parse(inputStream, contentHandler, metadata, TikaOutput.this.context);
            if (contentHandler instanceof NoDocumentMetHandler) {
                NoDocumentMetHandler noDocumentMetHandler = (NoDocumentMetHandler) contentHandler;
                if (noDocumentMetHandler.metOutput()) {
                    return;
                }
                noDocumentMetHandler.endDocument();
            }
        }

        protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) throws Exception {
            throw new UnsupportedOperationException();
        }
    }

    public TikaOutput(ClassLoader classLoader, ILogChannel iLogChannel, IVariables iVariables) throws IOException, MimeTypeException {
        this.tikaConfig = new TikaConfig(classLoader);
        this.log = iLogChannel;
        this.variables = iVariables;
        this.tika = new org.apache.tika.Tika(this.tikaConfig);
        this.detector = this.tika.getDetector();
        this.parser = this.tika.getParser();
        this.context.set(Parser.class, this.parser);
        this.context.set(PasswordProvider.class, metadata -> {
            return iVariables.getVariable("TIKA_PASSWORD");
        });
    }

    private static Writer getOutputWriter(OutputStream outputStream, String str) throws UnsupportedEncodingException {
        return str != null ? new OutputStreamWriter(outputStream, str) : System.getProperty("os.name").toLowerCase().startsWith("mac os x") ? new OutputStreamWriter(outputStream, "UTF-8") : new OutputStreamWriter(outputStream);
    }

    private static TransformerHandler getTransformerHandler(OutputStream outputStream, String str, String str2, boolean z) throws TransformerConfigurationException {
        SAXTransformerFactory sAXTransformerFactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
        sAXTransformerFactory.setAttribute("http://javax.xml.XMLConstants/property/accessExternalDTD", "");
        sAXTransformerFactory.setAttribute("http://javax.xml.XMLConstants/property/accessExternalStylesheet", "");
        TransformerHandler newTransformerHandler = sAXTransformerFactory.newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty("method", str);
        newTransformerHandler.getTransformer().setOutputProperty("indent", z ? "yes" : "no");
        if (str2 != null) {
            newTransformerHandler.getTransformer().setOutputProperty("encoding", str2);
        }
        newTransformerHandler.setResult(new StreamResult(outputStream));
        return newTransformerHandler;
    }

    public final OutputType getXml() {
        return new OutputType() { // from class: org.apache.hop.pipeline.transforms.tika.TikaOutput.1
            @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.OutputType
            protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) throws Exception {
                return TikaOutput.getTransformerHandler(outputStream, "xml", TikaOutput.this.encoding, TikaOutput.this.prettyPrint);
            }
        };
    }

    public final OutputType getHTML() {
        return new OutputType() { // from class: org.apache.hop.pipeline.transforms.tika.TikaOutput.2
            @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.OutputType
            protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) throws Exception {
                return new ExpandedTitleContentHandler(TikaOutput.getTransformerHandler(outputStream, "html", TikaOutput.this.encoding, TikaOutput.this.prettyPrint));
            }
        };
    }

    public final OutputType getTEXT() {
        return new OutputType() { // from class: org.apache.hop.pipeline.transforms.tika.TikaOutput.3
            @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.OutputType
            protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) throws Exception {
                return new BodyContentHandler(TikaOutput.getOutputWriter(outputStream, TikaOutput.this.encoding));
            }
        };
    }

    public final OutputType getNO_OUTPUT() {
        return new OutputType() { // from class: org.apache.hop.pipeline.transforms.tika.TikaOutput.4
            @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.OutputType
            protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) {
                return new DefaultHandler();
            }
        };
    }

    public final OutputType getTEXT_MAIN() {
        return new OutputType() { // from class: org.apache.hop.pipeline.transforms.tika.TikaOutput.5
            @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.OutputType
            protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) throws Exception {
                return new BodyContentHandler(TikaOutput.getOutputWriter(outputStream, TikaOutput.this.encoding));
            }
        };
    }

    public final OutputType getMETADATA() {
        return new OutputType() { // from class: org.apache.hop.pipeline.transforms.tika.TikaOutput.6
            @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.OutputType
            protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) throws Exception {
                return new NoDocumentMetHandler(metadata, new PrintWriter(TikaOutput.getOutputWriter(outputStream, TikaOutput.this.encoding)));
            }
        };
    }

    public final OutputType getJSON() {
        return new OutputType() { // from class: org.apache.hop.pipeline.transforms.tika.TikaOutput.7
            @Override // org.apache.hop.pipeline.transforms.tika.TikaOutput.OutputType
            protected ContentHandler getContentHandler(OutputStream outputStream, Metadata metadata) throws Exception {
                return new NoDocumentJSONMetHandler(metadata, new PrintWriter(TikaOutput.getOutputWriter(outputStream, TikaOutput.this.encoding)));
            }
        };
    }

    public Map<String, OutputType> getFileOutputTypeCodes() {
        HashMap hashMap = new HashMap();
        hashMap.put("Plain text", getTEXT());
        hashMap.put("Main content", getTEXT_MAIN());
        hashMap.put("Xml", getXml());
        hashMap.put("HTML", getHTML());
        hashMap.put("JSON", getJSON());
        return hashMap;
    }

    private OutputType getTypeByName(String str) {
        return getFileOutputTypeCodes().get(str);
    }

    public void parse(InputStream inputStream, String str, OutputStream outputStream) throws Exception {
        TikaInputStream tikaInputStream = TikaInputStream.get(inputStream);
        OutputType typeByName = getTypeByName(str);
        try {
            try {
                this.lastMetadata = new Metadata();
                typeByName.process(tikaInputStream, outputStream, this.lastMetadata);
            } finally {
                try {
                    tikaInputStream.close();
                    outputStream.flush();
                } catch (Exception e) {
                    this.log.logError("Error closing file", e);
                }
            }
        } catch (Exception e2) {
            throw new HopException("Error processing output type : " + typeByName.toString(), e2);
        }
    }

    public Metadata getLastMetadata() {
        return this.lastMetadata;
    }

    public void setLastMetadata(Metadata metadata) {
        this.lastMetadata = metadata;
    }
}
