package org.apache.nifi.processors.kite;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.ReadsAttribute;
import org.apache.nifi.annotation.behavior.ReadsAttributes;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.Validator;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.kitesdk.data.spi.JsonUtil;
import org.kitesdk.data.spi.filesystem.CSVProperties;
import org.kitesdk.data.spi.filesystem.CSVUtil;

@CapabilityDescription("Examines the contents of the incoming FlowFile to infer an Avro schema. The processor will use the Kite SDK to make an attempt to automatically generate an Avro schema from the incoming content. When inferring the schema from JSON data the key names will be used in the resulting Avro schema definition. When inferring from CSV data a \"header definition\" must be present either as the first line of the incoming data or the \"header definition\" must be explicitly set in the property \"CSV Header Definition\". A \"header definition\" is simply a single comma separated line defining the names of each column. The \"header definition\" is required in order to determine the names that should be given to each field in the resulting Avro definition. When inferring data types the higher order data type is always used if there is ambiguity. For example when examining numerical values the type may be set to \"long\" instead of \"integer\" since a long can safely hold the value of any \"integer\". Only CSV and JSON content is currently supported for automatically inferring an Avro schema. The type of content present in the incoming FlowFile is set by using the property \"Input Content Type\". The property can either be explicitly set to CSV, JSON, or \"use mime.type value\" which will examine the value of the mime.type attribute on the incoming FlowFile to determine the type of content present.")
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"kite", "avro", "infer", "schema", InferAvroSchema.CSV_CONTENT, InferAvroSchema.JSON_CONTENT})
@WritesAttributes({@WritesAttribute(attribute = InferAvroSchema.AVRO_SCHEMA_ATTRIBUTE_NAME, description = "If configured by \"Schema output destination\" to write to an attribute this will hold the resulting Avro schema from inferring the incoming FlowFile content.")})
@ReadsAttributes({@ReadsAttribute(attribute = "mime.type", description = "If configured by property \"Input Content Type\" will use this value to determine what sort of content should be inferred from the incoming FlowFile content.")})
/* loaded from: input_file:org/apache/nifi/processors/kite/InferAvroSchema.class */
public class InferAvroSchema extends AbstractKiteProcessor {
    public static final String AVRO_SCHEMA_ATTRIBUTE_NAME = "inferred.avro.schema";
    public static final String JSON_MIME_TYPE = "application/json";
    public static final String CSV_MIME_TYPE = "text/csv";
    public static final String AVRO_MIME_TYPE = "application/avro-binary";
    public static final String AVRO_FILE_EXTENSION = ".avro";
    private List<PropertyDescriptor> properties;
    private Set<Relationship> relationships;
    private static final Validator CHAR_VALIDATOR = new Validator() { // from class: org.apache.nifi.processors.kite.InferAvroSchema.1
        public ValidationResult validate(String str, String str2, ValidationContext validationContext) {
            String unescapeString = InferAvroSchema.unescapeString(str2);
            return new ValidationResult.Builder().subject(str).input(unescapeString).explanation("Only non-null single characters are supported").valid((unescapeString.length() == 1 && unescapeString.charAt(0) != 0) || validationContext.isExpressionLanguagePresent(unescapeString)).build();
        }
    };
    public static final Pattern AVRO_RECORD_NAME_PATTERN = Pattern.compile("[A-Za-z_]+[A-Za-z0-9_.]*[^.]");
    public static final String DESTINATION_ATTRIBUTE = "flowfile-attribute";
    public static final String DESTINATION_CONTENT = "flowfile-content";
    public static final PropertyDescriptor SCHEMA_DESTINATION = new PropertyDescriptor.Builder().name("Schema Output Destination").description("Control if Avro schema is written as a new flowfile attribute 'inferred.avro.schema' or written in the flowfile content. Writing to flowfile content will overwrite any existing flowfile content.").required(true).allowableValues(new String[]{DESTINATION_ATTRIBUTE, DESTINATION_CONTENT}).defaultValue(DESTINATION_CONTENT).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final String USE_MIME_TYPE = "use mime.type value";
    public static final String JSON_CONTENT = "json";
    public static final String CSV_CONTENT = "csv";
    public static final PropertyDescriptor INPUT_CONTENT_TYPE = new PropertyDescriptor.Builder().name("Input Content Type").description("Content Type of data present in the incoming FlowFile's content. Only \"json\" or \"csv\" are supported. If this value is set to \"use mime.type value\" the incoming Flowfile's attribute \"" + CoreAttributes.MIME_TYPE + "\" will be used to determine the Content Type.").allowableValues(new String[]{USE_MIME_TYPE, JSON_CONTENT, CSV_CONTENT}).defaultValue(USE_MIME_TYPE).required(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor GET_CSV_HEADER_DEFINITION_FROM_INPUT = new PropertyDescriptor.Builder().name("Get CSV Header Definition From Data").description("This property only applies to CSV content type. If \"true\" the processor will attempt to read the CSV header definition from the first line of the input data.").required(true).allowableValues(new String[]{"true", "false"}).defaultValue("true").addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final PropertyDescriptor CSV_HEADER_DEFINITION = new PropertyDescriptor.Builder().name("CSV Header Definition").description("This property only applies to CSV content type. Comma separated string defining the column names expected in the CSV data. EX: \"fname,lname,zip,address\". The elements present in this string should be in the same order as the underlying data. Setting this property will cause the value of \"" + GET_CSV_HEADER_DEFINITION_FROM_INPUT.getName() + "\" to be ignored instead using this value.").required(false).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).defaultValue((String) null).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor HEADER_LINE_SKIP_COUNT = new PropertyDescriptor.Builder().name("CSV Header Line Skip Count").description("This property only applies to CSV content type. Specifies the number of lines that should be skipped when reading the CSV data. Setting this value to 0 is equivalent to saying \"the entire contents of the file should be read\". If the property \"" + GET_CSV_HEADER_DEFINITION_FROM_INPUT.getName() + "\" is set then the first line of the CSV  file will be read in and treated as the CSV header definition. Since this will remove the header line from the data care should be taken to make sure the value of \"CSV header Line Skip Count\" is set to 0 to ensure no data is skipped.").required(true).defaultValue("0").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).build();
    public static final PropertyDescriptor DELIMITER = new PropertyDescriptor.Builder().name("CSV delimiter").description("Delimiter character for CSV records").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(CHAR_VALIDATOR).defaultValue(",").build();
    public static final PropertyDescriptor ESCAPE_STRING = new PropertyDescriptor.Builder().name("CSV Escape String").description("This property only applies to CSV content type. String that represents an escape sequence in the CSV FlowFile content data.").required(true).defaultValue("\\").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor QUOTE_STRING = new PropertyDescriptor.Builder().name("CSV Quote String").description("This property only applies to CSV content type. String that represents a literal quote character in the CSV FlowFile content data.").required(true).defaultValue("'").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor RECORD_NAME = new PropertyDescriptor.Builder().name("Avro Record Name").description("Value to be placed in the Avro record schema \"name\" field. The value must adhere to the Avro naming rules for fullname. If Expression Language is present then the evaluated value must adhere to the Avro naming rules.").required(true).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.createRegexMatchingValidator(AVRO_RECORD_NAME_PATTERN)).build();
    public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder().name("Charset").description("Character encoding of CSV data.").required(true).defaultValue("UTF-8").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.CHARACTER_SET_VALIDATOR).build();
    public static final PropertyDescriptor PRETTY_AVRO_OUTPUT = new PropertyDescriptor.Builder().name("Pretty Avro Output").description("If true the Avro output will be formatted.").required(true).defaultValue("true").allowableValues(new String[]{"true", "false"}).addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final PropertyDescriptor NUM_RECORDS_TO_ANALYZE = new PropertyDescriptor.Builder().name("Number Of Records To Analyze").description("This property only applies to JSON content type. The number of JSON records that should be examined to determine the Avro schema. The higher the value the better chance kite has of detecting the appropriate type. However the default value of 10 is almost always enough.").required(true).defaultValue("10").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).build();
    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success").description("Successfully created Avro schema from data.").build();
    public static final Relationship REL_ORIGINAL = new Relationship.Builder().name("original").description("Original incoming FlowFile data").build();
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description("Failed to create Avro schema from data.").build();
    public static final Relationship REL_UNSUPPORTED_CONTENT = new Relationship.Builder().name("unsupported content").description("The content found in the flowfile content is not of the required format.").build();

    protected void init(ProcessorInitializationContext processorInitializationContext) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(SCHEMA_DESTINATION);
        arrayList.add(INPUT_CONTENT_TYPE);
        arrayList.add(CSV_HEADER_DEFINITION);
        arrayList.add(GET_CSV_HEADER_DEFINITION_FROM_INPUT);
        arrayList.add(HEADER_LINE_SKIP_COUNT);
        arrayList.add(DELIMITER);
        arrayList.add(ESCAPE_STRING);
        arrayList.add(QUOTE_STRING);
        arrayList.add(PRETTY_AVRO_OUTPUT);
        arrayList.add(RECORD_NAME);
        arrayList.add(NUM_RECORDS_TO_ANALYZE);
        arrayList.add(CHARSET);
        this.properties = Collections.unmodifiableList(arrayList);
        HashSet hashSet = new HashSet();
        hashSet.add(REL_SUCCESS);
        hashSet.add(REL_FAILURE);
        hashSet.add(REL_ORIGINAL);
        hashSet.add(REL_UNSUPPORTED_CONTENT);
        this.relationships = Collections.unmodifiableSet(hashSet);
    }

    @Override // org.apache.nifi.processors.kite.AbstractKiteProcessor
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return this.properties;
    }

    public Set<Relationship> getRelationships() {
        return this.relationships;
    }

    public void onTrigger(ProcessContext processContext, ProcessSession processSession) throws ProcessException {
        FlowFile flowFile = processSession.get();
        if (flowFile == null) {
            return;
        }
        try {
            final AtomicReference atomicReference = new AtomicReference();
            String value = processContext.getProperty(INPUT_CONTENT_TYPE).getValue();
            boolean z = -1;
            switch (value.hashCode()) {
                case -465303956:
                    if (value.equals(USE_MIME_TYPE)) {
                        z = false;
                        break;
                    }
                    break;
                case 98822:
                    if (value.equals(CSV_CONTENT)) {
                        z = 2;
                        break;
                    }
                    break;
                case 3271912:
                    if (value.equals(JSON_CONTENT)) {
                        z = true;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    atomicReference.set(inferAvroSchemaFromMimeType(flowFile, processContext, processSession));
                    break;
                case true:
                    atomicReference.set(inferAvroSchemaFromJSON(flowFile, processContext, processSession));
                    break;
                case true:
                    atomicReference.set(inferAvroSchemaFromCSV(flowFile, processContext, processSession));
                    break;
                default:
                    processSession.transfer(flowFile, REL_UNSUPPORTED_CONTENT);
                    break;
            }
            if (StringUtils.isNotEmpty((String) atomicReference.get())) {
                String value2 = processContext.getProperty(SCHEMA_DESTINATION).getValue();
                FlowFile flowFile2 = null;
                boolean z2 = -1;
                switch (value2.hashCode()) {
                    case -1375719367:
                        if (value2.equals(DESTINATION_ATTRIBUTE)) {
                            z2 = false;
                            break;
                        }
                        break;
                    case -1029800618:
                        if (value2.equals(DESTINATION_CONTENT)) {
                            z2 = true;
                            break;
                        }
                        break;
                }
                switch (z2) {
                    case false:
                        flowFile2 = processSession.putAttribute(processSession.clone(flowFile), AVRO_SCHEMA_ATTRIBUTE_NAME, (String) atomicReference.get());
                        break;
                    case true:
                        flowFile2 = processSession.putAttribute(processSession.write(processSession.create(), new OutputStreamCallback() { // from class: org.apache.nifi.processors.kite.InferAvroSchema.2
                            public void process(OutputStream outputStream) throws IOException {
                                outputStream.write(((String) atomicReference.get()).getBytes());
                            }
                        }), CoreAttributes.MIME_TYPE.key(), AVRO_MIME_TYPE);
                        break;
                }
                processSession.transfer(processSession.putAttribute(flowFile2, CoreAttributes.FILENAME.key(), flowFile.getAttribute(CoreAttributes.FILENAME.key()) + AVRO_FILE_EXTENSION), REL_SUCCESS);
                processSession.transfer(flowFile, REL_ORIGINAL);
            } else {
                processSession.transfer(flowFile, REL_UNSUPPORTED_CONTENT);
            }
        } catch (Exception e) {
            getLogger().error("Failed to infer Avro schema for {} due to {}", new Object[]{flowFile, e});
            processSession.transfer(flowFile, REL_FAILURE);
        }
    }

    private String inferAvroSchemaFromCSV(final FlowFile flowFile, final ProcessContext processContext, ProcessSession processSession) {
        final AtomicReference atomicReference = new AtomicReference();
        final AtomicReference atomicReference2 = new AtomicReference();
        if (processContext.getProperty(GET_CSV_HEADER_DEFINITION_FROM_INPUT).asBoolean() == Boolean.TRUE) {
            processSession.read(flowFile, new InputStreamCallback() { // from class: org.apache.nifi.processors.kite.InferAvroSchema.3
                public void process(InputStream inputStream) throws IOException {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
                    atomicReference.set(bufferedReader.readLine());
                    atomicReference2.set(Boolean.TRUE);
                    bufferedReader.close();
                }
            });
            atomicReference2.set(Boolean.TRUE);
        } else {
            atomicReference.set(processContext.getProperty(CSV_HEADER_DEFINITION).evaluateAttributeExpressions(flowFile).getValue());
            atomicReference2.set(Boolean.FALSE);
        }
        final CSVProperties build = new CSVProperties.Builder().charset(processContext.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue()).delimiter(processContext.getProperty(DELIMITER).evaluateAttributeExpressions(flowFile).getValue()).quote(processContext.getProperty(QUOTE_STRING).evaluateAttributeExpressions(flowFile).getValue()).escape(processContext.getProperty(ESCAPE_STRING).evaluateAttributeExpressions(flowFile).getValue()).linesToSkip(processContext.getProperty(HEADER_LINE_SKIP_COUNT).evaluateAttributeExpressions(flowFile).asInteger().intValue()).header((String) atomicReference.get()).hasHeader(((Boolean) atomicReference2.get()).booleanValue()).build();
        final AtomicReference atomicReference3 = new AtomicReference();
        processSession.read(flowFile, new InputStreamCallback() { // from class: org.apache.nifi.processors.kite.InferAvroSchema.4
            public void process(InputStream inputStream) throws IOException {
                atomicReference3.set(CSVUtil.inferSchema(processContext.getProperty(InferAvroSchema.RECORD_NAME).evaluateAttributeExpressions(flowFile).getValue(), inputStream, build).toString(processContext.getProperty(InferAvroSchema.PRETTY_AVRO_OUTPUT).asBoolean().booleanValue()));
            }
        });
        return (String) atomicReference3.get();
    }

    private String inferAvroSchemaFromJSON(final FlowFile flowFile, final ProcessContext processContext, ProcessSession processSession) {
        final AtomicReference atomicReference = new AtomicReference();
        processSession.read(flowFile, new InputStreamCallback() { // from class: org.apache.nifi.processors.kite.InferAvroSchema.5
            public void process(InputStream inputStream) throws IOException {
                atomicReference.set(JsonUtil.inferSchema(inputStream, processContext.getProperty(InferAvroSchema.RECORD_NAME).evaluateAttributeExpressions(flowFile).getValue(), processContext.getProperty(InferAvroSchema.NUM_RECORDS_TO_ANALYZE).evaluateAttributeExpressions(flowFile).asInteger().intValue()).toString(processContext.getProperty(InferAvroSchema.PRETTY_AVRO_OUTPUT).asBoolean().booleanValue()));
            }
        });
        return (String) atomicReference.get();
    }

    private String inferAvroSchemaFromMimeType(FlowFile flowFile, ProcessContext processContext, ProcessSession processSession) {
        String attribute = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
        String str = "";
        if (attribute != null) {
            boolean z = -1;
            switch (attribute.hashCode()) {
                case -1004747228:
                    if (attribute.equals(CSV_MIME_TYPE)) {
                        z = true;
                        break;
                    }
                    break;
                case -43840953:
                    if (attribute.equals(JSON_MIME_TYPE)) {
                        z = false;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    getLogger().debug("Inferred content type as JSON from \"{}\" value of \"{}\"", new Object[]{CoreAttributes.MIME_TYPE.key(), flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())});
                    str = inferAvroSchemaFromJSON(flowFile, processContext, processSession);
                    break;
                case true:
                    getLogger().debug("Inferred content type as CSV from \"{}\" value of \"{}\"", new Object[]{CoreAttributes.MIME_TYPE.key(), flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())});
                    str = inferAvroSchemaFromCSV(flowFile, processContext, processSession);
                    break;
                default:
                    getLogger().warn("Unable to infer Avro Schema from {} because its mime type is {},  which is not supported by this Processor", new Object[]{flowFile, attribute});
                    break;
            }
        }
        return str;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String unescapeString(String str) {
        if (str.length() > 1) {
            str = StringEscapeUtils.unescapeJava(str);
        }
        return str;
    }
}
