package uk.ac.shef.dcs.sti.experiment;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.simmetrics.metrics.StringMetrics;
import uk.ac.shef.dcs.kbsearch.KBSearchFactory;
import uk.ac.shef.dcs.sti.STIConstantProperty;
import uk.ac.shef.dcs.sti.STIException;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.LEARNING;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.LEARNINGPreliminaryColumnClassifier;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.LEARNINGPreliminaryDisamb;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.LiteralColumnTaggerImpl;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.TCellDisambiguator;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.TColumnClassifier;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.TColumnColumnRelationEnumerator;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.TMPInterpreter;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.UPDATE;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.sampler.OSPD_nonEmpty;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.sampler.TContentTContentRowRankerImpl;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.scorer.TMPClazzScorer;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.scorer.TMPEntityScorer;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.scorer.TMPRelationScorer;
import uk.ac.shef.dcs.sti.core.feature.FreebaseConceptBoWCreator;
import uk.ac.shef.dcs.sti.core.feature.FreebaseRelationBoWCreator;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.core.scorer.AttributeValueMatcher;
import uk.ac.shef.dcs.sti.core.subjectcol.SubjectColumnDetector;

/* loaded from: input_file:uk/ac/shef/dcs/sti/experiment/TableMinerPlusBatch.class */
public class TableMinerPlusBatch extends STIBatch {
    protected static final String PROPERTY_TMP_IINF_LEARNING_STOPPING_CLASS = "sti.tmp.iinf.learning.stopping.class";
    protected static final String PROPERTY_TMP_IINF_LEARNING_STOPPING_CLASS_CONSTR_PARAM = "sti.tmp.iinf.learning.stopping.class.constructor.params";
    private static final Logger LOG = Logger.getLogger(TableMinerPlusBatch.class.getName());

    public TableMinerPlusBatch(String str) throws IOException, STIException {
        super(str);
    }

    @Override // uk.ac.shef.dcs.sti.experiment.STIBatch
    protected void initComponents() throws STIException {
        LOG.info("Initializing entity cache...");
        EmbeddedSolrServer solrServerCacheEntity = getSolrServerCacheEntity();
        LOG.info("Initializing KBSearch...");
        try {
            this.kbSearch = new KBSearchFactory().createInstance(getAbsolutePath("sti.kbsearch.propertyfile"), solrServerCacheEntity, (EmbeddedSolrServer) null, (EmbeddedSolrServer) null, (EmbeddedSolrServer) null);
            LOG.info("Initializing SUBJECT COLUMN DETECTION components ...");
            try {
                SubjectColumnDetector subjectColumnDetector = new SubjectColumnDetector(new TContentTContentRowRankerImpl(), this.properties.getProperty("sti.iinf.websearch.stopping.class"), StringUtils.split(this.properties.getProperty("sti.iinf.websearch.stopping.class.constructor.params"), ','), getSolrServerCacheWebsearch(), getNLPResourcesDir(), Boolean.valueOf(this.properties.getProperty("sti.subjectcolumndetection.ws")).booleanValue(), getStopwords(), getAbsolutePath("sti.websearch.properties"));
                LOG.info("Initializing LEARNING components ...");
                try {
                    TCellDisambiguator tCellDisambiguator = new TCellDisambiguator(this.kbSearch, new TMPEntityScorer(getStopwords(), STIConstantProperty.SCORER_ENTITY_CONTEXT_WEIGHT, getNLPResourcesDir()));
                    TColumnClassifier tColumnClassifier = new TColumnClassifier(new TMPClazzScorer(getNLPResourcesDir(), new FreebaseConceptBoWCreator(), getStopwords(), STIConstantProperty.SCORER_CLAZZ_CONTEXT_WEIGHT));
                    OSPD_nonEmpty oSPD_nonEmpty = new OSPD_nonEmpty();
                    LEARNING learning = new LEARNING(new LEARNINGPreliminaryColumnClassifier(oSPD_nonEmpty, this.properties.getProperty(PROPERTY_TMP_IINF_LEARNING_STOPPING_CLASS), StringUtils.split(this.properties.getProperty(PROPERTY_TMP_IINF_LEARNING_STOPPING_CLASS_CONSTR_PARAM), ','), this.kbSearch, tCellDisambiguator, tColumnClassifier), new LEARNINGPreliminaryDisamb(this.kbSearch, tCellDisambiguator, tColumnClassifier));
                    LOG.info("Initializing UPDATE components ...");
                    try {
                        UPDATE update = new UPDATE(oSPD_nonEmpty, this.kbSearch, tCellDisambiguator, tColumnClassifier, getStopwords(), getNLPResourcesDir());
                        LOG.info("Initializing RELATIONLEARNING components ...");
                        TColumnColumnRelationEnumerator tColumnColumnRelationEnumerator = null;
                        LiteralColumnTaggerImpl literalColumnTaggerImpl = null;
                        try {
                            tColumnColumnRelationEnumerator = new TColumnColumnRelationEnumerator(new AttributeValueMatcher(0.5d, getStopwords(), StringMetrics.levenshtein()), new TMPRelationScorer(getNLPResourcesDir(), new FreebaseRelationBoWCreator(), getStopwords(), STIConstantProperty.SCORER_RELATION_CONTEXT_WEIGHT));
                            literalColumnTaggerImpl = new LiteralColumnTaggerImpl(getIgnoreColumns());
                        } catch (Exception e) {
                        }
                        this.interpreter = new TMPInterpreter(subjectColumnDetector, learning, update, tColumnColumnRelationEnumerator, literalColumnTaggerImpl, getIgnoreColumns(), getMustdoColumns());
                    } catch (Exception e2) {
                        e2.printStackTrace();
                        LOG.error(ExceptionUtils.getFullStackTrace(e2));
                        throw new STIException("Failed initialising LEARNING components:", e2);
                    }
                } catch (Exception e3) {
                    e3.printStackTrace();
                    LOG.error(ExceptionUtils.getFullStackTrace(e3));
                    throw new STIException("Failed initialising LEARNING components:", e3);
                }
            } catch (Exception e4) {
                e4.printStackTrace();
                LOG.error(ExceptionUtils.getFullStackTrace(e4));
                throw new STIException("Failed initialising SUBJECT COLUMN DETECTION components:" + this.properties.getProperty("sti.websearch.properties"), e4);
            }
        } catch (Exception e5) {
            e5.printStackTrace();
            LOG.error(ExceptionUtils.getFullStackTrace(e5));
            throw new STIException("Failed initialising KBSearch:" + getAbsolutePath("sti.kbsearch.propertyfile"), e5);
        }
    }

    public static void main(String[] strArr) throws IOException, STIException {
        String str = strArr[0];
        String str2 = strArr[1];
        TableMinerPlusBatch tableMinerPlusBatch = new TableMinerPlusBatch(strArr[2]);
        int i = 0;
        List<File> asList = Arrays.asList(new File(str).listFiles());
        Collections.sort(asList);
        LOG.info("Initialization complete. Begin STI. Total input files=" + asList.size() + "\n");
        List<Integer> loadPreviouslyFailed = tableMinerPlusBatch.loadPreviouslyFailed();
        int startIndex = tableMinerPlusBatch.getStartIndex();
        for (File file : asList) {
            if (!file.toString().contains(".DS_Store")) {
                i++;
                if (loadPreviouslyFailed.size() == 0 || loadPreviouslyFailed.contains(Integer.valueOf(i))) {
                    if (i - 1 >= startIndex) {
                        String file2 = file.toString();
                        try {
                            String str3 = file2;
                            if (str3.startsWith("\"") && str3.endsWith("\"")) {
                                str3 = str3.substring(1, str3.length() - 1).trim();
                            }
                            LOG.info("\n<< " + i + "_" + str3);
                            List<Table> loadTable = tableMinerPlusBatch.loadTable(file2, tableMinerPlusBatch.getTableParser());
                            if (loadTable.size() == 0) {
                                tableMinerPlusBatch.recordFailure(i, file2, file2);
                            }
                            Iterator<Table> it = loadTable.iterator();
                            while (it.hasNext()) {
                                boolean process = tableMinerPlusBatch.process(it.next(), str3, tableMinerPlusBatch.getTAnnotationWriter(), str2, Boolean.valueOf(tableMinerPlusBatch.properties.getProperty("sti.learning.relation")).booleanValue());
                                tableMinerPlusBatch.commitAll();
                                if (!process) {
                                    tableMinerPlusBatch.recordFailure(i, str3, file2);
                                }
                            }
                        } catch (Exception e) {
                            e.printStackTrace();
                            tableMinerPlusBatch.recordFailure(i, file2, file2);
                        }
                    }
                }
            }
        }
        tableMinerPlusBatch.closeAll();
        LOG.info(new Date());
    }
}
