package co.cask.gcp.gcs.source;

import co.cask.cdap.api.annotation.Description;
import co.cask.cdap.api.annotation.Macro;
import co.cask.cdap.api.annotation.Name;
import co.cask.cdap.api.annotation.Plugin;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.plugin.EndpointPluginContext;
import co.cask.cdap.etl.api.PipelineConfigurer;
import co.cask.cdap.etl.api.batch.BatchSourceContext;
import co.cask.gcp.common.GCPReferenceSourceConfig;
import co.cask.gcp.gcs.GCSConfigHelper;
import co.cask.hydrator.common.LineageRecorder;
import co.cask.hydrator.format.FileFormat;
import co.cask.hydrator.format.input.PathTrackingInputFormat;
import co.cask.hydrator.format.plugin.AbstractFileSource;
import co.cask.hydrator.format.plugin.FileSourceProperties;
import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import java.lang.reflect.Type;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import javax.ws.rs.Path;

@Name(GCSSource.NAME)
@Description("Reads objects from a path in a Google Cloud Storage bucket.")
@Plugin(type = "batchsource")
/* loaded from: input_file:co/cask/gcp/gcs/source/GCSSource.class */
public class GCSSource extends AbstractFileSource<GCSSourceConfig> {
    public static final String NAME = "GCSFile";
    private final GCSSourceConfig config;

    /* loaded from: input_file:co/cask/gcp/gcs/source/GCSSource$GCSSourceConfig.class */
    public static class GCSSourceConfig extends GCPReferenceSourceConfig implements FileSourceProperties {
        private static final Gson GSON = new Gson();
        private static final Type MAP_STRING_STRING_TYPE = new TypeToken<Map<String, String>>() { // from class: co.cask.gcp.gcs.source.GCSSource.GCSSourceConfig.1
        }.getType();

        @Description("The path to read from. For example, gs://<bucket>/path/to/directory/")
        @Macro
        private String path;

        @Description("Map of properties to set on the InputFormat.")
        @Macro
        @Nullable
        private String fileSystemProperties;

        @Description("Output field to place the path of the file that the record was read from. If not specified, the file path will not be included in output records. If specified, the field must exist in the output schema as a string.")
        @Nullable
        private String pathField;

        @Description("Format of the data to read. Supported formats are 'avro', 'blob', 'csv', 'delimited', 'json', 'parquet', 'text', and 'tsv'.")
        @Macro
        @Nullable
        private String format;

        @Description("Output schema. If a Path Field is set, it must be present in the schema as a string.")
        @Macro
        @Nullable
        private String schema;

        @Description("Regular expression that file paths must match in order to be included in the input. The full file path is compared, not just the file name.If no value is given, no file filtering will be done. See https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html for more information about the regular expression syntax.")
        @Macro
        @Nullable
        private String fileRegex;

        @Description("The delimiter to use if the format is 'delimited'. The delimiter will be ignored if the format is anything other than 'delimited'.")
        @Macro
        @Nullable
        private String delimiter;

        @Description("Maximum size of each partition used to read data. Smaller partitions will increase the level of parallelism, but will require more resources and overhead.")
        @Macro
        @Nullable
        private Long maxSplitSize = 134217728L;

        @Description("Whether to recursively read directories within the input directory. The default is false.")
        @Macro
        @Nullable
        private Boolean recursive = false;

        @Description("Whether to only use the filename instead of the URI of the file path when a path field is given. The default value is false.")
        @Nullable
        private Boolean filenameOnly = false;

        @Nullable
        private Boolean copyHeader = false;

        @Override // co.cask.gcp.common.GCPReferenceSourceConfig, co.cask.hydrator.format.plugin.FileSourceProperties
        public void validate() {
            super.validate();
            if (!containsMacro("path")) {
                GCSConfigHelper.getPath(this.path);
            }
            getFileSystemProperties();
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        public String getReferenceName() {
            return this.referenceName;
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        public String getPath() {
            return this.path;
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        public FileFormat getFormat() {
            return FileFormat.from(this.format, (v0) -> {
                return v0.canRead();
            });
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        @Nullable
        public Pattern getFilePattern() {
            try {
                if (this.fileRegex == null) {
                    return null;
                }
                return Pattern.compile(this.fileRegex);
            } catch (RuntimeException e) {
                throw new IllegalArgumentException("Invalid file regular expression: " + e.getMessage(), e);
            }
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        public long getMaxSplitSize() {
            return this.maxSplitSize.longValue();
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        public boolean shouldAllowEmptyInput() {
            return false;
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        public boolean shouldReadRecursively() {
            return this.recursive.booleanValue();
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        @Nullable
        public String getPathField() {
            return this.pathField;
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        public boolean useFilenameAsPath() {
            return this.filenameOnly.booleanValue();
        }

        @Override // co.cask.hydrator.format.plugin.FileSourceProperties
        @Nullable
        public Schema getSchema() {
            try {
                if (this.schema == null) {
                    return null;
                }
                return Schema.parseJson(this.schema);
            } catch (Exception e) {
                throw new IllegalArgumentException("Unable to parse schema with error: " + e.getMessage(), e);
            }
        }

        Map<String, String> getFileSystemProperties() {
            return this.fileSystemProperties == null ? Collections.emptyMap() : (Map) GSON.fromJson(this.fileSystemProperties, MAP_STRING_STRING_TYPE);
        }
    }

    public GCSSource(GCSSourceConfig gCSSourceConfig) {
        super(gCSSourceConfig);
        this.config = gCSSourceConfig;
    }

    @Override // co.cask.hydrator.format.plugin.AbstractFileSource
    public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
        super.configurePipeline(pipelineConfigurer);
        this.config.validate();
    }

    @Override // co.cask.hydrator.format.plugin.AbstractFileSource
    protected Map<String, String> getFileSystemProperties(BatchSourceContext batchSourceContext) {
        HashMap hashMap = new HashMap(this.config.getFileSystemProperties());
        String serviceAccountFilePath = this.config.getServiceAccountFilePath();
        if (serviceAccountFilePath != null) {
            hashMap.put("google.cloud.auth.service.account.json.keyfile", serviceAccountFilePath);
        }
        hashMap.put("fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem");
        hashMap.put("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS");
        hashMap.put(GoogleHadoopFileSystemBase.GCS_PROJECT_ID_KEY, this.config.getProject());
        hashMap.put(GoogleHadoopFileSystemBase.GCS_SYSTEM_BUCKET_KEY, GCSConfigHelper.getBucket(this.config.path));
        hashMap.put(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, "/");
        hashMap.put("fs.gs.impl.disable.cache", "true");
        if (this.config.copyHeader.booleanValue()) {
            hashMap.put(PathTrackingInputFormat.COPY_HEADER, "true");
        }
        return hashMap;
    }

    @Override // co.cask.hydrator.format.plugin.AbstractFileSource
    protected void recordLineage(LineageRecorder lineageRecorder, List<String> list) {
        lineageRecorder.recordRead("Read", "Read from Google Cloud Storage.", list);
    }

    @Path("getSchema")
    public Schema getSchema(GCSSourceConfig gCSSourceConfig, EndpointPluginContext endpointPluginContext) {
        Schema schema;
        FileFormat format = gCSSourceConfig.getFormat();
        if (format != null && (schema = format.getSchema(gCSSourceConfig.getPathField())) != null) {
            return schema;
        }
        return gCSSourceConfig.getSchema();
    }
}
