package org.apache.stormcrawler.aws.bolt;

import com.amazonaws.regions.RegionUtils;
import com.amazonaws.services.cloudsearchdomain.AmazonCloudSearchDomainClient;
import com.amazonaws.services.cloudsearchdomain.model.ContentType;
import com.amazonaws.services.cloudsearchdomain.model.DocumentServiceWarning;
import com.amazonaws.services.cloudsearchdomain.model.UploadDocumentsRequest;
import com.amazonaws.services.cloudsearchdomain.model.UploadDocumentsResult;
import com.amazonaws.services.cloudsearchv2.AmazonCloudSearchClient;
import com.amazonaws.services.cloudsearchv2.model.DescribeDomainsRequest;
import com.amazonaws.services.cloudsearchv2.model.DescribeIndexFieldsRequest;
import com.amazonaws.services.cloudsearchv2.model.DomainStatus;
import com.amazonaws.services.cloudsearchv2.model.IndexFieldStatus;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.attribute.FileAttribute;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.Config;
import org.apache.storm.metric.api.MultiCountMetric;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.TupleUtils;
import org.apache.stormcrawler.Metadata;
import org.apache.stormcrawler.indexing.AbstractIndexerBolt;
import org.apache.stormcrawler.persistence.Status;
import org.apache.stormcrawler.util.ConfUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/stormcrawler/aws/bolt/CloudSearchIndexerBolt.class */
public class CloudSearchIndexerBolt extends AbstractIndexerBolt {
    private static final int MAX_SIZE_BATCH_BYTES = 5242880;
    private static final int MAX_SIZE_DOC_BYTES = 1048576;
    private AmazonCloudSearchDomainClient client;
    private StringBuffer buffer;
    private OutputCollector _collector;
    private MultiCountMetric eventCounter;
    public static final Logger LOG = LoggerFactory.getLogger(CloudSearchIndexerBolt.class);
    private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
    private int maxDocsInBatch = -1;
    private int numDocsInBatch = 0;
    private int maxTimeBuffered = 10;
    private boolean dumpBatchFilesToTemp = false;
    private Map<String, String> csfields = new HashMap();
    private long timeLastBatchSent = System.currentTimeMillis();
    private List<Tuple> unacked = new ArrayList();

    public void prepare(Map<String, Object> map, TopologyContext topologyContext, OutputCollector outputCollector) {
        super.prepare(map, topologyContext, outputCollector);
        this._collector = outputCollector;
        this.eventCounter = topologyContext.registerMetric("CloudSearchIndexer", new MultiCountMetric(), 10);
        this.maxTimeBuffered = ConfUtils.getInt(map, CloudSearchConstants.MAX_TIME_BUFFERED, 10);
        this.maxDocsInBatch = ConfUtils.getInt(map, CloudSearchConstants.MAX_DOCS_BATCH, -1);
        this.buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
        this.dumpBatchFilesToTemp = ConfUtils.getBoolean(map, CloudSearchConstants.BATCH_DUMP, false);
        if (this.dumpBatchFilesToTemp) {
            return;
        }
        String string = ConfUtils.getString(map, CloudSearchConstants.ENDPOINT);
        if (StringUtils.isBlank(string)) {
            LOG.error("Missing CloudSearch endpoint");
            throw new RuntimeException("Missing CloudSearch endpoint");
        }
        String string2 = ConfUtils.getString(map, CloudSearchConstants.REGION);
        AmazonCloudSearchClient amazonCloudSearchClient = new AmazonCloudSearchClient();
        if (StringUtils.isNotBlank(string2)) {
            amazonCloudSearchClient.setRegion(RegionUtils.getRegion(string2));
        }
        String str = null;
        Iterator it = amazonCloudSearchClient.describeDomains(new DescribeDomainsRequest()).getDomainStatusList().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            DomainStatus domainStatus = (DomainStatus) it.next();
            if (domainStatus.getDocService().getEndpoint().equals(string)) {
                str = domainStatus.getDomainName();
                break;
            }
        }
        if (StringUtils.isBlank(str)) {
            throw new RuntimeException("No domain name found for CloudSearch endpoint");
        }
        for (IndexFieldStatus indexFieldStatus : amazonCloudSearchClient.describeIndexFields(new DescribeIndexFieldsRequest().withDomainName(str)).getIndexFields()) {
            String indexFieldName = indexFieldStatus.getOptions().getIndexFieldName();
            String indexFieldType = indexFieldStatus.getOptions().getIndexFieldType();
            LOG.info("CloudSearch index name {} of type {}", indexFieldName, indexFieldType);
            this.csfields.put(indexFieldName, indexFieldType);
        }
        this.client = new AmazonCloudSearchDomainClient();
        this.client.setEndpoint(string);
    }

    protected String getDocumentID(Metadata metadata, String str) {
        return CloudSearchUtils.getID(str);
    }

    public void execute(Tuple tuple) {
        if (TupleUtils.isTick(tuple)) {
            if (System.currentTimeMillis() - this.timeLastBatchSent >= this.maxTimeBuffered * 1000) {
                sendBatch();
            }
            this._collector.ack(tuple);
            return;
        }
        String stringByField = tuple.getStringByField("url");
        String valueForURL = valueForURL(tuple);
        Metadata metadata = (Metadata) tuple.getValueByField("metadata");
        String stringByField2 = tuple.getStringByField("text");
        if (!filterDocument(metadata)) {
            this.eventCounter.scope("Filtered").incrBy(1L);
            this._collector.emit("status", tuple, new Values(new Object[]{stringByField, metadata, Status.FETCHED}));
            this._collector.ack(tuple);
            return;
        }
        try {
            ObjectMapper objectMapper = new ObjectMapper();
            ObjectNode createObjectNode = objectMapper.createObjectNode();
            createObjectNode.put("type", "add");
            createObjectNode.put("id", getDocumentID(metadata, valueForURL));
            ObjectNode createObjectNode2 = objectMapper.createObjectNode();
            for (Map.Entry entry : filterMetadata(metadata).entrySet()) {
                String cleanFieldName = CloudSearchUtils.cleanFieldName((String) entry.getKey());
                String str = this.csfields.get(cleanFieldName);
                if (str != null || this.dumpBatchFilesToTemp) {
                    String[] strArr = (String[]) entry.getValue();
                    if (strArr.length > 1 && !StringUtils.containsIgnoreCase(str, "-array")) {
                        LOG.info("{} values found for field {} of type {} - keeping only the first one. {}", new Object[]{Integer.valueOf(strArr.length), cleanFieldName, str, stringByField});
                        strArr = new String[]{strArr[0]};
                    }
                    ArrayNode arrayNode = createObjectNode.arrayNode();
                    for (String str2 : strArr) {
                        if (StringUtils.containsIgnoreCase(str, "date")) {
                            try {
                                DATE_FORMAT.parse(str2);
                            } catch (ParseException e) {
                                LOG.info("Unparsable date {}", str2);
                            }
                        } else {
                            str2 = CloudSearchUtils.stripNonCharCodepoints(str2);
                        }
                        arrayNode.add(str2);
                    }
                    if (arrayNode.size() > 0) {
                        createObjectNode.set(cleanFieldName, arrayNode);
                    }
                } else {
                    LOG.info("Field {} not defined in CloudSearch domain for {} - skipping.", cleanFieldName, stringByField);
                }
            }
            String fieldNameForURL = fieldNameForURL();
            if (StringUtils.isNotBlank(fieldNameForURL)) {
                String cleanFieldName2 = CloudSearchUtils.cleanFieldName(fieldNameForURL);
                if (this.dumpBatchFilesToTemp || this.csfields.get(cleanFieldName2) != null) {
                    createObjectNode2.put(cleanFieldName2, CloudSearchUtils.stripNonCharCodepoints(valueForURL));
                }
            }
            String fieldNameForText = fieldNameForText();
            if (StringUtils.isNotBlank(fieldNameForText)) {
                String cleanFieldName3 = CloudSearchUtils.cleanFieldName(fieldNameForText);
                if (this.dumpBatchFilesToTemp || this.csfields.get(cleanFieldName3) != null) {
                    createObjectNode2.put(cleanFieldName3, CloudSearchUtils.stripNonCharCodepoints(trimText(stringByField2)));
                }
            }
            createObjectNode.set("fields", createObjectNode2);
            addToBatch(objectMapper.writeValueAsString(createObjectNode), stringByField, tuple);
        } catch (Exception e2) {
            LOG.error("Exception caught while building JSON object", e2);
            this._collector.emit("status", tuple, new Values(new Object[]{stringByField, metadata, Status.ERROR}));
            this._collector.ack(tuple);
        }
    }

    private void addToBatch(String str, String str2, Tuple tuple) {
        int length = str.getBytes(StandardCharsets.UTF_8).length;
        if (length > MAX_SIZE_DOC_BYTES) {
            LOG.error("Doc too large. currentDoc.length {} : {}", Integer.valueOf(length), str2);
            return;
        }
        int length2 = this.buffer.toString().getBytes(StandardCharsets.UTF_8).length;
        LOG.debug("currentDoc.length {}, buffer length {}", Integer.valueOf(length), Integer.valueOf(length2));
        if (length + 2 + length2 < MAX_SIZE_BATCH_BYTES) {
            if (this.numDocsInBatch != 0) {
                this.buffer.append(',');
            }
            this.buffer.append(str);
            this.unacked.add(tuple);
            this.numDocsInBatch++;
        } else {
            sendBatch();
            this.buffer.append(str);
            this.unacked.add(tuple);
            this.numDocsInBatch++;
        }
        if (this.maxDocsInBatch <= 0 || this.numDocsInBatch != this.maxDocsInBatch) {
            return;
        }
        sendBatch();
    }

    public void sendBatch() {
        this.timeLastBatchSent = System.currentTimeMillis();
        if (this.numDocsInBatch == 0) {
            return;
        }
        this.buffer.append(']');
        LOG.info("Sending {} docs to CloudSearch", Integer.valueOf(this.numDocsInBatch));
        byte[] bytes = this.buffer.toString().getBytes(StandardCharsets.UTF_8);
        try {
            try {
                if (this.dumpBatchFilesToTemp) {
                    try {
                        File file = Files.createTempFile("CloudSearch_", ".json", new FileAttribute[0]).toFile();
                        FileUtils.writeByteArrayToFile(file, bytes);
                        LOG.info("Wrote batch file {}", file.getName());
                        for (Tuple tuple : this.unacked) {
                            this._collector.emit("status", tuple, new Values(new Object[]{tuple.getStringByField("url"), (Metadata) tuple.getValueByField("metadata"), Status.FETCHED}));
                            this._collector.ack(tuple);
                        }
                        this.unacked.clear();
                        this.buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
                        this.numDocsInBatch = 0;
                        return;
                    } catch (IOException e) {
                        LOG.error("Exception while generating batch file", e);
                        Iterator<Tuple> it = this.unacked.iterator();
                        while (it.hasNext()) {
                            this._collector.fail(it.next());
                        }
                        this.unacked.clear();
                        this.buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
                        this.numDocsInBatch = 0;
                        return;
                    }
                }
                try {
                    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes);
                    try {
                        UploadDocumentsRequest uploadDocumentsRequest = new UploadDocumentsRequest();
                        uploadDocumentsRequest.setContentLength(Long.valueOf(bytes.length));
                        uploadDocumentsRequest.setContentType(ContentType.Applicationjson);
                        uploadDocumentsRequest.setDocuments(byteArrayInputStream);
                        UploadDocumentsResult uploadDocuments = this.client.uploadDocuments(uploadDocumentsRequest);
                        LOG.info(uploadDocuments.getStatus());
                        Iterator it2 = uploadDocuments.getWarnings().iterator();
                        while (it2.hasNext()) {
                            LOG.info(((DocumentServiceWarning) it2.next()).getMessage());
                        }
                        if (!uploadDocuments.getWarnings().isEmpty()) {
                            this.eventCounter.scope("Warnings").incrBy(uploadDocuments.getWarnings().size());
                        }
                        this.eventCounter.scope("Added").incrBy(uploadDocuments.getAdds().longValue());
                        for (Tuple tuple2 : this.unacked) {
                            this._collector.emit("status", tuple2, new Values(new Object[]{tuple2.getStringByField("url"), (Metadata) tuple2.getValueByField("metadata"), Status.FETCHED}));
                            this._collector.ack(tuple2);
                        }
                        this.unacked.clear();
                        byteArrayInputStream.close();
                        this.buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
                        this.numDocsInBatch = 0;
                    } catch (Throwable th) {
                        try {
                            byteArrayInputStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                        throw th;
                    }
                } catch (Exception e2) {
                    LOG.error("Exception while sending batch", e2);
                    LOG.error(this.buffer.toString());
                    Iterator<Tuple> it3 = this.unacked.iterator();
                    while (it3.hasNext()) {
                        this._collector.fail(it3.next());
                    }
                    this.unacked.clear();
                    this.buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
                    this.numDocsInBatch = 0;
                }
            } catch (Throwable th3) {
                this.buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
                this.numDocsInBatch = 0;
                throw th3;
            }
        } catch (Throwable th4) {
            this.buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
            this.numDocsInBatch = 0;
            throw th4;
        }
    }

    public void cleanup() {
        sendBatch();
        this.client.shutdown();
    }

    public Map<String, Object> getComponentConfiguration() {
        Config config = new Config();
        config.put("topology.tick.tuple.freq.secs", 1);
        return config;
    }
}
