/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.reader.xml;

import de.julielab.costosys.configuration.FieldConfig;
import de.julielab.costosys.dbconnection.CoStoSysConnection;
import de.julielab.costosys.dbconnection.DataBaseConnector;
import de.julielab.jcore.reader.db.DBMultiplier;
import de.julielab.jcore.reader.db.DBReader;
import de.julielab.jcore.reader.xml.CasPopulationException;
import de.julielab.jcore.reader.xml.CasPopulator;
import de.julielab.jcore.reader.xml.Initializer;
import de.julielab.jcore.reader.xml.Row2CasMapper;
import de.julielab.jcore.reader.xmlmapper.mapper.XMLMapper;
import de.julielab.jcore.types.casflow.ToVisit;
import de.julielab.jcore.types.casmultiplier.RowBatch;
import de.julielab.jcore.types.ext.DBProcessingMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.AbstractCas;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe XML Database Multiplier", description="This CAS multiplier receives information about documents to be read from an instance of the XML Database Multiplier reader from the jcore-db-reader project. The multiplier employs the jcore-xml-mapper to map the document XML structure into CAS instances. It also supports additional tables sent by the DB Multiplier Reader that are then joined to the main table. This mechanism is used to load separate data from additional database tables and populate the CAS with them via the 'RowMapping' parameter. This component is part of the Jena Document Information System, JeDIS.", vendor="JULIE Lab Jena, Germany", copyright="JULIE Lab Jena, Germany")
@TypeCapability(inputs={"de.julielab.jcore.types.casmultiplier.RowBatch"}, outputs={"de.julielab.jcore.types.casflow.ToVisit"})
public class XMLDBMultiplier
extends DBMultiplier {
    public static final String PARAM_ROW_MAPPING = "RowMapping";
    public static final String PARAM_MAPPING_FILE = "MappingFile";
    public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
    public static final String PARAM_TABLE_DOCUMENT_SCHEMA = "DocumentTableSchema";
    public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
    public static final String PARAM_ADD_TO_VISIT_KEYS = "AddToVisitKeys";
    public static final String PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG = "AddUnchangedDocumentTextFlag";
    public static final String PARAM_TRUNCATE_AT_SIZE = "TruncateAtSize";
    private static final Logger log = LoggerFactory.getLogger(XMLDBMultiplier.class);
    protected XMLMapper xmlMapper;
    @ConfigurationParameter(name="RowMapping", mandatory=false, description="In case that the CoStoSys active table schema specified more than two columns to be retrieved, the other columns need a mapping into the CAS.A mapping item has the following form: <column index>=<uima type>#<type feature>:<feature datatype>:defaultValue where the defaultValue is optional. Example: 2=de.julielab.jules.types.max_xmi_id#id:int:0 maps the content of the third (index 2, zero-based) retrieved column (may also belong to an additional table!) to feature \"id\" of the type \"d.j.j.t.max_xmi_id\" which is an int. In case there is no value returned from the database for a document, use a 0 as default.")
    protected String[] rowMappingArray;
    @ConfigurationParameter(name="MappingFile", description="An XML mapping file following the specification required by the jcore-xml-mapper. The mapping file specifies how contents from an XML docuent are to be brought into the CAS.")
    protected String mappingFileStr;
    @ConfigurationParameter(name="AddShaHash", mandatory=false, description="For use with AnnotationDefinedFlowController and XMIDBWriter. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the ToVisitKeys parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'. Additionally, the DBProcessingMetaData#hasDocumentHashChanged is set. This can be used by the XMIDBWriter to omit the reset of mirror subsets when updating the base document when the actual CAS text stayed the same.")
    private String documentItemToHash;
    @ConfigurationParameter(name="DocumentTable", mandatory=false, description="For use with AnnotationDefinedFlowController. String parameter indicating the name of the table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
    private String xmiStorageDataTable;
    @ConfigurationParameter(name="DocumentTableSchema", mandatory=false, description="For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the DocumentTable parameter - adheres to. Only the primary key part is required for hash value retrieval.")
    private String xmiStorageDataTableSchema;
    @ConfigurationParameter(name="ToVisitKeys", mandatory=false, description="For use with AnnotationDefinedFlowController. Specifies the delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. The task of the AnnotationDefinedFlowController is then to read those annotations and route the CAS accordingly. The parameter values will only be added to the CAS if AddToVisitKeys is set to true, the document text hash is available in the database (see the AddShaHash parameter) and the hash in the database equals to the hash of the current document in the pipeline.")
    private String[] toVisitKeys;
    @ConfigurationParameter(name="AddToVisitKeys", mandatory=false, description="Toggles the creation of annotations for the AnnotationDefinedFlowController. Only needed when such a flow controller is used in the pipeline. For details, see the description of ToVisitKeys.")
    private boolean addToVisitKeys;
    @ConfigurationParameter(name="AddUnchangedDocumentTextFlag", mandatory=false, description="Toggles the addition of the 'document text is unchanged' flag. The value of this flag is determined via a SHA256 hash of the CAS document text. When DocumentTable and DocumentTableSchema are specified, the hash value of the document in storage is retrieved and compared to the current value. The flag is then set with respect to the comparison result.")
    private boolean addUnchangedDocumentTextFlag;
    @ConfigurationParameter(name="TruncateAtSize", mandatory=false, description="Specify size in bytes of the XML document size. If the document surpasses that size, it is not populated from XMI but given some placeholder information. This can be necessary when large documents cannot be handled by subsequent components in the pipeline.")
    private int truncationSize;
    private Row2CasMapper row2CasMapper;
    private CasPopulator casPopulator;
    private Map<String, String> docId2HashMap;
    private boolean initialized;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.mappingFileStr = (String)aContext.getConfigParameterValue(PARAM_MAPPING_FILE);
        this.rowMappingArray = (String[])aContext.getConfigParameterValue(PARAM_ROW_MAPPING);
        this.xmiStorageDataTable = (String)aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT);
        this.xmiStorageDataTableSchema = (String)aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT_SCHEMA);
        this.documentItemToHash = Optional.ofNullable((String)aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
        this.toVisitKeys = (String[])aContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
        this.addToVisitKeys = (Boolean)Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_TO_VISIT_KEYS)).orElse(false);
        this.addUnchangedDocumentTextFlag = (Boolean)Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG)).orElse(false);
        this.truncationSize = Optional.ofNullable((Integer)aContext.getConfigParameterValue(PARAM_TRUNCATE_AT_SIZE)).orElse(Integer.MAX_VALUE);
        Initializer initializer = new Initializer(this.mappingFileStr, null, null);
        this.xmlMapper = initializer.getXmlMapper();
        this.initialized = false;
        if (!(!this.addToVisitKeys && !this.addUnchangedDocumentTextFlag || this.xmiStorageDataTable == null && this.xmiStorageDataTableSchema == null || this.xmiStorageDataTable != null && this.xmiStorageDataTableSchema != null && this.documentItemToHash != null)) {
            String errorMsg = String.format("From the parameters '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA);
            log.error(errorMsg);
            throw new ResourceInitializationException((Throwable)new IllegalArgumentException(errorMsg));
        }
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        super.process(aJCas);
        this.docId2HashMap = this.fetchCurrentHashesFromDatabase((RowBatch)JCasUtil.selectSingle((JCas)aJCas, RowBatch.class));
    }

    public AbstractCas next() throws AnalysisEngineProcessException {
        JCas jCas;
        block6: {
            jCas = this.getEmptyJCas();
            try {
                if (!this.documentDataIterator.hasNext()) break block6;
                if (!this.initialized) {
                    try {
                        this.row2CasMapper = new Row2CasMapper(this.rowMappingArray, () -> this.getAllRetrievedColumns());
                    }
                    catch (ResourceInitializationException e) {
                        throw new AnalysisEngineProcessException((Throwable)e);
                    }
                    if (this.xmiStorageDataTable != null && !this.dbc.withConnectionQueryBoolean(d -> d.tableExists(this.xmiStorageDataTable))) {
                        throw new AnalysisEngineProcessException((Throwable)new IllegalArgumentException("The data table" + this.xmiStorageDataTable + " to retrieve hash values from for document text change detection does not exist in the database: " + this.dbc.getDbURL()));
                    }
                    this.casPopulator = new CasPopulator(this.dbc, this.xmlMapper, this.row2CasMapper, this.rowMappingArray, this.truncationSize);
                    this.initialized = true;
                }
                byte[][] documentData = (byte[][])this.documentDataIterator.next();
                this.populateCas(jCas, documentData);
                this.setToVisitAnnotation(jCas);
            }
            catch (Exception e) {
                log.error("Exception occurred: ", (Throwable)e);
                throw e;
            }
        }
        return jCas;
    }

    private void setToVisitAnnotation(JCas jCas) {
        if (this.addToVisitKeys || this.addUnchangedDocumentTextFlag) {
            DBProcessingMetaData dbProcessingMetaData = (DBProcessingMetaData)JCasUtil.selectSingle((JCas)jCas, DBProcessingMetaData.class);
            StringArray pkArray = dbProcessingMetaData.getPrimaryKey();
            String pkString = String.join((CharSequence)",", pkArray.toArray());
            String existingHash = this.docId2HashMap.get(pkString);
            if (existingHash != null) {
                String newHash = this.getHash(jCas);
                if (existingHash.equals(newHash)) {
                    if (log.isTraceEnabled()) {
                        log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", (Object)pkString, (Object)this.toVisitKeys);
                    }
                    if (this.addUnchangedDocumentTextFlag) {
                        dbProcessingMetaData.setIsDocumentHashUnchanged(true);
                    }
                    if (this.addToVisitKeys) {
                        ToVisit toVisit = new ToVisit(jCas);
                        if (this.toVisitKeys != null && this.toVisitKeys.length != 0) {
                            StringArray keysArray = new StringArray(jCas, this.toVisitKeys.length);
                            keysArray.copyFromArray(this.toVisitKeys, 0, 0, this.toVisitKeys.length);
                            toVisit.setDelegateKeys(keysArray);
                        }
                        toVisit.addToIndexes();
                    }
                }
            } else {
                log.trace("No existing hash was found for document {}", (Object)pkString);
            }
        }
    }

    private String getHash(JCas newCas) {
        String documentText = newCas.getDocumentText();
        byte[] sha = DigestUtils.sha256((byte[])documentText.getBytes());
        return Base64.encodeBase64String((byte[])sha);
    }

    private void populateCas(JCas jCas, byte[][] documentData) throws AnalysisEngineProcessException {
        try {
            this.casPopulator.populateCas(jCas, documentData, (docData, jcas) -> DBReader.setDBProcessingMetaData((DataBaseConnector)this.dbc, (boolean)this.readDataTable, (String)this.tableName, (byte[][])docData, (JCas)jcas));
        }
        catch (CasPopulationException e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    protected List<Map<String, Object>> getAllRetrievedColumns() {
        Pair numColumnsAndFields = this.dbc.getNumColumnsAndFields(this.tables.length > 1, this.schemaNames);
        return ((List)numColumnsAndFields.getRight()).stream().map(HashMap::new).collect(Collectors.toList());
    }

    private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
        if ((this.addToVisitKeys || this.addUnchangedDocumentTextFlag) && rowBatch.getIdentifiers() != null && rowBatch.getIdentifiers().size() > 0) {
            String hashColumn = this.documentItemToHash + "_sha256";
            ArrayList<String[]> documentIds = new ArrayList<String[]>(rowBatch.getIdentifiers().size());
            for (StringArray pkArray : rowBatch.getIdentifiers()) {
                documentIds.add(pkArray.toStringArray());
            }
            HashMap<String, String> id2hash = new HashMap<String, String>(documentIds.size());
            String sql = null;
            try (CoStoSysConnection conn = this.dbc.obtainOrReserveConnection();){
                FieldConfig xmiTableSchema = this.dbc.getFieldConfiguration(this.xmiStorageDataTableSchema);
                String idQuery = documentIds.stream().map(key -> (String[])Arrays.stream(key).map(part -> "%s='" + part + "'").toArray(String[]::new)).map(arg_0 -> ((FieldConfig)xmiTableSchema).expandPKNames(arg_0)).map(expandedKeys -> String.join((CharSequence)" AND ", expandedKeys)).collect(Collectors.joining(" OR "));
                sql = String.format("SELECT %s,%s FROM %s WHERE %s", xmiTableSchema.getPrimaryKeyString(), hashColumn, this.xmiStorageDataTable, idQuery);
                ResultSet rs = conn.createStatement().executeQuery(sql);
                while (rs.next()) {
                    StringBuilder pkSb = new StringBuilder();
                    for (int i = 0; i < xmiTableSchema.getPrimaryKey().length; ++i) {
                        pkSb.append(rs.getString(i + 1)).append(',');
                    }
                    pkSb.deleteCharAt(pkSb.length() - 1);
                    String hash = rs.getString(xmiTableSchema.getPrimaryKey().length + 1);
                    id2hash.put(pkSb.toString(), hash);
                }
            }
            catch (SQLException e) {
                log.error("Could not retrieve hashes from the database. SQL query was '{}':", sql, (Object)e);
                throw new AnalysisEngineProcessException((Throwable)e);
            }
            return id2hash;
        }
        return null;
    }
}

