package org.apache.beam.sdk.io.tika;

import com.google.auto.value.AutoValue;
import java.nio.channels.Channels;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.io.Compression;
import org.apache.beam.sdk.io.FileIO;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.io.tika.AutoValue_TikaIO_Parse;
import org.apache.beam.sdk.io.tika.AutoValue_TikaIO_ParseFiles;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.ToTextContentHandler;

@Experimental(Experimental.Kind.SOURCE_SINK)
/* loaded from: input_file:org/apache/beam/sdk/io/tika/TikaIO.class */
public class TikaIO {

    @AutoValue
    /* loaded from: input_file:org/apache/beam/sdk/io/tika/TikaIO$Parse.class */
    public static abstract class Parse extends PTransform<PBegin, PCollection<ParseResult>> {

        /* JADX INFO: Access modifiers changed from: package-private */
        @AutoValue.Builder
        /* loaded from: input_file:org/apache/beam/sdk/io/tika/TikaIO$Parse$Builder.class */
        public static abstract class Builder {
            abstract Builder setFilepattern(ValueProvider<String> valueProvider);

            abstract Parse build();
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public abstract ValueProvider<String> getFilepattern();

        abstract Builder toBuilder();

        public Parse filepattern(String str) {
            return filepattern((ValueProvider<String>) ValueProvider.StaticValueProvider.of(str));
        }

        public Parse filepattern(ValueProvider<String> valueProvider) {
            return toBuilder().setFilepattern(valueProvider).build();
        }

        public void populateDisplayData(DisplayData.Builder builder) {
            super.populateDisplayData(builder);
            builder.addIfNotNull(DisplayData.item("filePattern", getFilepattern()).withLabel("File Pattern"));
        }

        public PCollection<ParseResult> expand(PBegin pBegin) {
            return pBegin.apply(FileIO.match().filepattern(getFilepattern())).apply(FileIO.readMatches().withCompression(Compression.UNCOMPRESSED)).apply(TikaIO.parseFiles());
        }
    }

    @AutoValue
    /* loaded from: input_file:org/apache/beam/sdk/io/tika/TikaIO$ParseFiles.class */
    public static abstract class ParseFiles extends PTransform<PCollection<FileIO.ReadableFile>, PCollection<ParseResult>> {

        /* JADX INFO: Access modifiers changed from: package-private */
        @AutoValue.Builder
        /* loaded from: input_file:org/apache/beam/sdk/io/tika/TikaIO$ParseFiles$Builder.class */
        public static abstract class Builder {
            abstract Builder setTikaConfigPath(ValueProvider<String> valueProvider);

            abstract Builder setContentTypeHint(String str);

            abstract Builder setInputMetadata(Metadata metadata);

            abstract ParseFiles build();
        }

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:org/apache/beam/sdk/io/tika/TikaIO$ParseFiles$ParseToStringFn.class */
        public static class ParseToStringFn extends DoFn<FileIO.ReadableFile, ParseResult> {
            private final ParseFiles spec;
            private transient TikaConfig tikaConfig;

            ParseToStringFn(ParseFiles parseFiles) {
                this.spec = parseFiles;
            }

            @DoFn.Setup
            public void setup() throws Exception {
                if (this.spec.getTikaConfigPath() != null) {
                    this.tikaConfig = new TikaConfig(Channels.newInputStream(FileSystems.open(FileSystems.matchSingleFileSpec((String) this.spec.getTikaConfigPath().get()).resourceId())));
                }
            }

            @DoFn.ProcessElement
            public void processElement(DoFn<FileIO.ReadableFile, ParseResult>.ProcessContext processContext) throws Exception {
                ParseResult failure;
                FileIO.ReadableFile readableFile = (FileIO.ReadableFile) processContext.element();
                TikaInputStream tikaInputStream = TikaInputStream.get(Channels.newInputStream(readableFile.open()));
                Throwable th = null;
                try {
                    try {
                        AutoDetectParser autoDetectParser = this.tikaConfig == null ? new AutoDetectParser() : new AutoDetectParser(this.tikaConfig);
                        ParseContext parseContext = new ParseContext();
                        parseContext.set(Parser.class, autoDetectParser);
                        Metadata inputMetadata = this.spec.getInputMetadata() != null ? this.spec.getInputMetadata() : new Metadata();
                        if (this.spec.getContentTypeHint() != null) {
                            inputMetadata.set("Content-Type", this.spec.getContentTypeHint());
                        }
                        String resourceId = readableFile.getMetadata().resourceId().toString();
                        ToTextContentHandler toTextContentHandler = new ToTextContentHandler();
                        try {
                            autoDetectParser.parse(tikaInputStream, toTextContentHandler, inputMetadata, parseContext);
                            failure = ParseResult.success(resourceId, toTextContentHandler.toString(), inputMetadata);
                        } catch (Exception e) {
                            failure = ParseResult.failure(resourceId, toTextContentHandler.toString(), inputMetadata, e);
                        }
                        processContext.output(failure);
                        if (tikaInputStream != null) {
                            if (0 == 0) {
                                tikaInputStream.close();
                                return;
                            }
                            try {
                                tikaInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        }
                    } catch (Throwable th3) {
                        th = th3;
                        throw th3;
                    }
                } catch (Throwable th4) {
                    if (tikaInputStream != null) {
                        if (th != null) {
                            try {
                                tikaInputStream.close();
                            } catch (Throwable th5) {
                                th.addSuppressed(th5);
                            }
                        } else {
                            tikaInputStream.close();
                        }
                    }
                    throw th4;
                }
            }
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public abstract ValueProvider<String> getTikaConfigPath();

        /* JADX INFO: Access modifiers changed from: package-private */
        public abstract String getContentTypeHint();

        /* JADX INFO: Access modifiers changed from: package-private */
        public abstract Metadata getInputMetadata();

        abstract Builder toBuilder();

        public ParseFiles withTikaConfigPath(String str) {
            Preconditions.checkArgument(str != null, "tikaConfigPath can not be null.");
            return withTikaConfigPath((ValueProvider<String>) ValueProvider.StaticValueProvider.of(str));
        }

        public ParseFiles withTikaConfigPath(ValueProvider<String> valueProvider) {
            Preconditions.checkArgument(valueProvider != null, "tikaConfigPath can not be null.");
            return toBuilder().setTikaConfigPath(valueProvider).build();
        }

        public ParseFiles withContentTypeHint(String str) {
            Preconditions.checkNotNull(str, "contentTypeHint can not be null.");
            return toBuilder().setContentTypeHint(str).build();
        }

        public ParseFiles withInputMetadata(Metadata metadata) {
            Metadata inputMetadata = getInputMetadata();
            if (inputMetadata != null) {
                for (String str : metadata.names()) {
                    inputMetadata.set(str, metadata.get(str));
                }
            } else {
                inputMetadata = metadata;
            }
            return toBuilder().setInputMetadata(inputMetadata).build();
        }

        public PCollection<ParseResult> expand(PCollection<FileIO.ReadableFile> pCollection) {
            return pCollection.apply(ParDo.of(new ParseToStringFn(this)));
        }

        public void populateDisplayData(DisplayData.Builder builder) {
            super.populateDisplayData(builder);
            if (getTikaConfigPath() != null) {
                builder.add(DisplayData.item("tikaConfigPath", getTikaConfigPath()).withLabel("TikaConfig Path"));
            }
            Metadata inputMetadata = getInputMetadata();
            if (inputMetadata != null) {
                builder.add(DisplayData.item("inputMetadata", inputMetadata.toString().trim()).withLabel("Input Metadata"));
            }
            builder.addIfNotNull(DisplayData.item("contentTypeHint", getContentTypeHint()).withLabel("Content type hint"));
        }
    }

    public static Parse parse() {
        return new AutoValue_TikaIO_Parse.Builder().build();
    }

    public static ParseFiles parseFiles() {
        return new AutoValue_TikaIO_ParseFiles.Builder().build();
    }
}
