package co.cask.cdap.template.etl.transform;

import co.cask.cdap.api.annotation.Description;
import co.cask.cdap.api.annotation.Name;
import co.cask.cdap.api.annotation.Plugin;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.templates.plugins.PluginConfig;
import co.cask.cdap.template.etl.api.Emitter;
import co.cask.cdap.template.etl.api.Transform;
import co.cask.cdap.template.etl.api.TransformContext;
import java.nio.ByteBuffer;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import net.sf.uadetector.ReadableUserAgent;
import net.sf.uadetector.service.UADetectorServiceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Name("LogParser")
@Description("Parses logs from any input source for relevant information such as URI, IP, Browser, Device, HTTP status code, and timestamp.")
@Plugin(type = "transform")
/* loaded from: input_file:co/cask/cdap/template/etl/transform/LogParserTransform.class */
public class LogParserTransform extends Transform<StructuredRecord, StructuredRecord> {
    private static final String LOG_FORMAT_DESCRIPTION = "Log format to parse. Currently supports S3, CLF, and Cloudfront formats.";
    private static final String INPUT_NAME_DESCRIPTION = "Name of the field in the input schema which encodes the log information. The given field must be of type String or Bytes.";
    private static final int S3_REGEX_LENGTH = 18;
    private static final int CLF_REGEX_LENGTH = 9;
    private static final String S3_LOG = "S3";
    private static final String CLF_LOG = "CLF";
    private static final String CLOUDFRONT_LOG = "Cloudfront";
    private final LogParserConfig config;
    private final SimpleDateFormat sdfStrftime = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z");
    private final SimpleDateFormat sdfCloudfront = new SimpleDateFormat("yyyy-MM-dd:HH:mm:ss z");
    private static final Schema LOG_SCHEMA = Schema.recordOf("event", new Schema.Field[]{Schema.Field.of("uri", Schema.of(Schema.Type.STRING)), Schema.Field.of("ip", Schema.of(Schema.Type.STRING)), Schema.Field.of("browser", Schema.of(Schema.Type.STRING)), Schema.Field.of("device", Schema.of(Schema.Type.STRING)), Schema.Field.of("httpStatus", Schema.of(Schema.Type.INT)), Schema.Field.of("ts", Schema.of(Schema.Type.LONG))});
    private static final Logger LOG = LoggerFactory.getLogger(LogParserTransform.class);
    private static final Pattern CLF_LOG_PATTERN = Pattern.compile("^([\\d.]+|[:][:][\\d]) (\\S+) (\\S+) \\[([^\\]]+)\\] \"([^\"]+)\" (\\d{3}) ([-\"\\d]+) \"([^\"]+)\" \"([^\"]+)\"");
    private static final Pattern S3_LOG_PATTERN = Pattern.compile("^(\\S+) (\\S+) \\[(\\p{Print}+)\\] ([\\d.]+|[:][:][\\d]) (\\S+) (\\S+) (\\S+) (\\S+) \"([^\"]+)\" (\\d{3}) (\\p{Print}+) ([-\"\\d]+) ([-\"\\d]+) ([\\d]+) ([-\"\\d]+) \"(\\p{Print}+)\" \"([^\"]+)\" (\\p{Print}+)");
    private static final Pattern REQUEST_PAGE_PATTERN = Pattern.compile("(\\S+)\\s(\\S+).*");
    private static final int[] S3_INDICES = {9, 3, 4, 17, 10};
    private static final int[] CLF_INDICES = {5, 4, 1, 9, 6};

    /* loaded from: input_file:co/cask/cdap/template/etl/transform/LogParserTransform$LogParserConfig.class */
    public static class LogParserConfig extends PluginConfig {

        @Name("logFormat")
        @Description(LogParserTransform.LOG_FORMAT_DESCRIPTION)
        private String logFormat;

        @Name("inputName")
        @Description(LogParserTransform.INPUT_NAME_DESCRIPTION)
        private String inputName;

        public LogParserConfig(String str, String str2) {
            this.logFormat = str;
            this.inputName = str2;
        }
    }

    public LogParserTransform(LogParserConfig logParserConfig) {
        this.config = logParserConfig;
    }

    public void initialize(TransformContext transformContext) throws Exception {
        if (S3_LOG.equals(this.config.logFormat) || CLF_LOG.equals(this.config.logFormat) || CLOUDFRONT_LOG.equals(this.config.logFormat)) {
            return;
        }
        LOG.error("Log format not currently supported.");
        throw new IllegalStateException("Unsupported log format: " + this.config.logFormat);
    }

    public void transform(StructuredRecord structuredRecord, Emitter<StructuredRecord> emitter) throws Exception {
        StructuredRecord build;
        String log = getLog(structuredRecord);
        if (log == null) {
            LOG.debug("Couldn't read schema, log message was null");
            return;
        }
        if (S3_LOG.equals(this.config.logFormat)) {
            Matcher matcher = S3_LOG_PATTERN.matcher(log);
            if (!matcher.matches() || matcher.groupCount() < 18) {
                LOG.debug("Couldn't parse log because log did not match the S3 format, log: {}", log);
                return;
            }
            build = parseRequest(matcher, S3_INDICES);
        } else if (CLF_LOG.equals(this.config.logFormat)) {
            Matcher matcher2 = CLF_LOG_PATTERN.matcher(log);
            if (!matcher2.matches() || matcher2.groupCount() < 9) {
                LOG.debug("Couldn't parse log because the log did not match the CLF format. log: {}", log);
                return;
            }
            build = parseRequest(matcher2, CLF_INDICES);
        } else {
            if (log.startsWith("#")) {
                LOG.trace("Log is a comment. Ignoring...");
                return;
            }
            String[] split = log.split("\\t");
            String str = split[7];
            String str2 = split[4];
            long time = this.sdfCloudfront.parse(String.format("%s:%s UTC", split[0], split[1])).getTime();
            ReadableUserAgent parse = UADetectorServiceFactory.getResourceModuleParser().parse(split[10]);
            build = StructuredRecord.builder(LOG_SCHEMA).set("uri", str).set("ip", str2).set("browser", parse.getFamily().getName()).set("device", parse.getDeviceCategory().getCategory().getName()).set("httpStatus", Integer.valueOf(Integer.parseInt(split[8]))).set("ts", Long.valueOf(time)).build();
        }
        if (build != null) {
            emitter.emit(build);
        }
    }

    @Nullable
    private String getLog(StructuredRecord structuredRecord) {
        Schema.Field field = structuredRecord.getSchema().getField(this.config.inputName);
        if (field == null) {
            LOG.debug("Invalid inputName, no known inputField matches given input of " + this.config.inputName);
            return null;
        }
        Schema schema = field.getSchema();
        if (schema.isNullableSimple()) {
            schema = schema.getNonNullable();
        }
        Schema.Type type = schema.getType();
        if (!Schema.Type.STRING.equals(type) && !Schema.Type.BYTES.equals(type)) {
            LOG.error("Unsupported inputType in schema, only Schema.Type.BYTES and Schema.Type.STRING are supported. InputType: {}", type.toString());
            return null;
        }
        if (Schema.Type.STRING.equals(type)) {
            return (String) structuredRecord.get(this.config.inputName);
        }
        Object obj = structuredRecord.get(this.config.inputName);
        if (obj instanceof ByteBuffer) {
            return Bytes.toString((ByteBuffer) structuredRecord.get(this.config.inputName));
        }
        if (obj instanceof byte[]) {
            return Bytes.toString((byte[]) structuredRecord.get(this.config.inputName));
        }
        LOG.debug("Not a byte type, type is {}", obj.getClass().toString());
        return null;
    }

    @Nullable
    private StructuredRecord parseRequest(Matcher matcher, int[] iArr) {
        String group = matcher.group(iArr[0]);
        Matcher matcher2 = REQUEST_PAGE_PATTERN.matcher(group);
        if (!matcher2.matches() || matcher2.groupCount() < 2) {
            LOG.debug("Couldn't parse uri because request does not match request pattern, request: {}", group);
            return null;
        }
        String group2 = matcher2.group(2);
        long currentTimeMillis = System.currentTimeMillis();
        try {
            currentTimeMillis = this.sdfStrftime.parse(matcher.group(iArr[1])).getTime();
        } catch (ParseException e) {
            LOG.debug("Couldn't parse time from the input record, using current timestamp instead. Exception: {}", e.getMessage());
        }
        String group3 = matcher.group(iArr[2]);
        ReadableUserAgent parse = UADetectorServiceFactory.getResourceModuleParser().parse(matcher.group(iArr[3]));
        String name = parse.getFamily().getName();
        return StructuredRecord.builder(LOG_SCHEMA).set("uri", group2).set("ip", group3).set("browser", name).set("device", parse.getDeviceCategory().getCategory().getName()).set("httpStatus", Integer.valueOf(Integer.parseInt(matcher.group(iArr[4])))).set("ts", Long.valueOf(currentTimeMillis)).build();
    }

    public /* bridge */ /* synthetic */ void transform(Object obj, Emitter emitter) throws Exception {
        transform((StructuredRecord) obj, (Emitter<StructuredRecord>) emitter);
    }
}
