package uk.ac.shef.dcs.sti.core.algorithm.ji;

import cc.mallet.grmm.inference.Inferencer;
import cc.mallet.grmm.inference.LoopyBP;
import cc.mallet.grmm.types.AssignmentIterator;
import cc.mallet.grmm.types.Factor;
import cc.mallet.grmm.types.FactorGraph;
import cc.mallet.grmm.types.Variable;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.log4j.Logger;
import uk.ac.shef.dcs.kbsearch.KBSearchException;
import uk.ac.shef.dcs.sti.STIException;
import uk.ac.shef.dcs.sti.core.algorithm.SemanticTableInterpreter;
import uk.ac.shef.dcs.sti.core.algorithm.ji.factorgraph.FactorGraphBuilder;
import uk.ac.shef.dcs.sti.core.model.RelationColumns;
import uk.ac.shef.dcs.sti.core.model.TAnnotation;
import uk.ac.shef.dcs.sti.core.model.TCellAnnotation;
import uk.ac.shef.dcs.sti.core.model.TColumnColumnRelationAnnotation;
import uk.ac.shef.dcs.sti.core.model.TColumnHeaderAnnotation;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.core.subjectcol.SubjectColumnDetector;
import uk.ac.shef.dcs.sti.util.DataTypeClassifier;

/* loaded from: input_file:uk/ac/shef/dcs/sti/core/algorithm/ji/JIInterpreter.class */
public class JIInterpreter extends SemanticTableInterpreter {
    private static final Logger LOG = Logger.getLogger(JIInterpreter.class.getName());
    protected SubjectColumnDetector subjectColumnDetector;
    protected int maxIteration;
    protected List<String> invalidCellValues;
    protected boolean useSubjectColumn;
    protected boolean debugMode;
    protected CandidateEntityGenerator neGenerator;
    protected CandidateConceptGenerator columnClazzClassifier;
    protected CandidateRelationGenerator relationGenerator;
    private FactorGraphBuilder graphBuilder;

    public JIInterpreter(SubjectColumnDetector subjectColumnDetector, CandidateEntityGenerator candidateEntityGenerator, CandidateConceptGenerator candidateConceptGenerator, CandidateRelationGenerator candidateRelationGenerator, boolean z, int[] iArr, int[] iArr2, int i, boolean z2) {
        super(iArr, iArr2);
        this.invalidCellValues = Arrays.asList("yes", "no", "away", "home");
        this.useSubjectColumn = false;
        this.debugMode = false;
        this.useSubjectColumn = z;
        this.subjectColumnDetector = subjectColumnDetector;
        this.graphBuilder = new FactorGraphBuilder();
        this.neGenerator = candidateEntityGenerator;
        this.columnClazzClassifier = candidateConceptGenerator;
        this.relationGenerator = candidateRelationGenerator;
        this.maxIteration = i;
        this.debugMode = z2;
    }

    @Override // uk.ac.shef.dcs.sti.core.algorithm.SemanticTableInterpreter
    public TAnnotation start(Table table, boolean z) throws STIException {
        TAnnotationJI tAnnotationJI = new TAnnotationJI(table.getNumRows(), table.getNumCols());
        try {
            Set<Integer> collectIgnoreColumns = collectIgnoreColumns(table);
            int[] iArr = new int[collectIgnoreColumns.size()];
            int i = 0;
            Iterator<Integer> it = collectIgnoreColumns.iterator();
            while (it.hasNext()) {
                iArr[i] = it.next().intValue();
                i++;
            }
            LOG.info(">\t COLUMN FEATURE GENERATION AND SUBJECT COLUMN DETECTION (if enabled)...");
            tAnnotationJI.setSubjectColumn(((Integer) this.subjectColumnDetector.compute(table, iArr).get(0).getKey()).intValue());
            LOG.info(">\t JOINT INFERENCE VARIABLE INIT");
            LOG.info(">\t named entity generator...");
            boolean generateEntityCandidates = generateEntityCandidates(table, tAnnotationJI, collectIgnoreColumns);
            LOG.info(">\t column class generator");
            generateClazzCandidates(tAnnotationJI, table, collectIgnoreColumns);
            if (z) {
                LOG.info(">\t column column relation generator");
                generateRelationCandidates(tAnnotationJI, table, this.useSubjectColumn, collectIgnoreColumns);
            }
            if (generateEntityCandidates && hasAnnotation(tAnnotationJI)) {
                LOG.info(">\t BUILDING FACTOR GRAPH");
                List<FactorGraph> build = this.graphBuilder.build(tAnnotationJI, z, table.getSourceId());
                LOG.info(">\t\t " + build.size() + " maximum connected sub-graphs");
                for (int i2 = 0; i2 < build.size(); i2++) {
                    FactorGraph factorGraph = build.get(i2);
                    if (this.debugMode) {
                        DebuggingUtil.debugGraph(factorGraph, i2 + "th_graph," + table.getSourceId());
                        tAnnotationJI.debugAffinity(i2 + "th_graph," + table.getSourceId());
                    }
                    LOG.info(">\t RUNNING INFERENCE");
                    Inferencer loopyBP = this.maxIteration > 0 ? new LoopyBP(this.maxIteration) : new LoopyBP();
                    try {
                        loopyBP.computeMarginals(factorGraph);
                    } catch (IndexOutOfBoundsException e) {
                        if (this.debugMode) {
                            LOG.error("\t Graph empty exception, but checking did not catch this. System exists:" + table.getSourceId());
                            LOG.error(factorGraph.dumpToString());
                            for (Object obj : DebuggingUtil.debugAnnotations(tAnnotationJI)) {
                                LOG.info(obj.toString());
                            }
                            LOG.warn(ExceptionUtils.getFullStackTrace(e));
                            System.exit(1);
                        } else {
                            LOG.warn(ExceptionUtils.getFullStackTrace(e));
                        }
                    }
                    LOG.info(">\t COLLECTING MARGINAL PROB AND FINALIZING ANNOTATIONS");
                    if (!createAnnotations(factorGraph, this.graphBuilder, loopyBP, tAnnotationJI)) {
                        throw new STIException("Invalid marginals, failed: " + table.getSourceId());
                    }
                }
            } else {
                LOG.warn("EMPTY TABLE:" + table.getSourceId());
            }
            return tAnnotationJI;
        } catch (Exception e2) {
            throw new STIException(e2);
        }
    }

    private boolean hasAnnotation(TAnnotationJI tAnnotationJI) {
        for (int i = 0; i < tAnnotationJI.getCols(); i++) {
            if (tAnnotationJI.getHeaderAnnotation(i).length > 0) {
                return true;
            }
            for (int i2 = 0; i2 < tAnnotationJI.getRows(); i2++) {
                if (tAnnotationJI.getContentCellAnnotations(i2, i).length > 0) {
                    return true;
                }
            }
        }
        return tAnnotationJI.getColumncolumnRelations().size() > 0;
    }

    protected Set<Integer> collectIgnoreColumns(Table table) {
        HashSet hashSet = new HashSet();
        hashSet.addAll(getIgnoreColumns());
        for (int i = 0; i < table.getNumCols(); i++) {
            HashSet hashSet2 = new HashSet();
            for (int i2 = 0; i2 < table.getNumRows(); i2++) {
                String replaceAll = table.getContentCell(i2, i).getText().trim().replaceAll("[^a-zA-Z0-9]", "");
                if (replaceAll.length() > 1) {
                    hashSet2.add(replaceAll);
                }
            }
            if (hashSet2.size() < 4 && table.getNumRows() > 4) {
                hashSet2.removeAll(this.invalidCellValues);
                if (hashSet2.size() == 0) {
                    hashSet.add(Integer.valueOf(i));
                }
            }
        }
        return hashSet;
    }

    protected boolean generateEntityCandidates(Table table, TAnnotation tAnnotation, Collection<Integer> collection) throws KBSearchException {
        boolean z = false;
        for (int i = 0; i < table.getNumCols(); i++) {
            if (getMustdoColumns().contains(Integer.valueOf(i))) {
                LOG.info("\t\t>> column=(compulsory)" + i);
                for (int i2 = 0; i2 < table.getNumRows(); i2++) {
                    this.neGenerator.generateInitialCellAnnotations(tAnnotation, table, i2, i);
                }
                z = true;
            } else if (!collection.contains(Integer.valueOf(i)) && table.getColumnHeader(i).getFeature().getMostFrequentDataType().getType().equals(DataTypeClassifier.DataType.NAMED_ENTITY)) {
                LOG.info("\t\t>> column=" + i);
                for (int i3 = 0; i3 < table.getNumRows(); i3++) {
                    this.neGenerator.generateInitialCellAnnotations(tAnnotation, table, i3, i);
                }
                z = true;
            }
        }
        return z;
    }

    protected void generateClazzCandidates(TAnnotationJI tAnnotationJI, Table table, Collection<Integer> collection) throws KBSearchException, STIException {
        for (int i = 0; i < table.getNumCols(); i++) {
            if (getMustdoColumns().contains(Integer.valueOf(i))) {
                LOG.info("\t\t>> column=(compulsory)" + i);
                this.columnClazzClassifier.generateInitialColumnAnnotations(tAnnotationJI, table, i);
            } else if (!collection.contains(Integer.valueOf(i)) && table.getColumnHeader(i).getFeature().getMostFrequentDataType().getType().equals(DataTypeClassifier.DataType.NAMED_ENTITY)) {
                LOG.info("\t\t>> column=" + i);
                this.columnClazzClassifier.generateInitialColumnAnnotations(tAnnotationJI, table, i);
            }
        }
    }

    protected void generateRelationCandidates(TAnnotationJI tAnnotationJI, Table table, boolean z, Collection<Integer> collection) throws IOException, KBSearchException {
        this.relationGenerator.generateInitialColumnColumnRelations(tAnnotationJI, table, z, collection);
    }

    protected boolean createAnnotations(FactorGraph factorGraph, FactorGraphBuilder factorGraphBuilder, Inferencer inferencer, TAnnotationJI tAnnotationJI) {
        for (int i = 0; i < factorGraph.numVariables(); i++) {
            Variable variable = factorGraph.get(i);
            Factor lookupMarginal = inferencer.lookupMarginal(variable);
            String typeOfVariable = factorGraphBuilder.getTypeOfVariable(variable);
            if (typeOfVariable != null) {
                if (typeOfVariable.equals(VariableType.CELL.toString())) {
                    int[] cellPosition = factorGraphBuilder.getCellPosition(variable);
                    if (cellPosition == null) {
                        continue;
                    } else {
                        TCellAnnotation[] contentCellAnnotations = tAnnotationJI.getContentCellAnnotations(cellPosition[0], cellPosition[1]);
                        for (TCellAnnotation tCellAnnotation : contentCellAnnotations) {
                            AssignmentIterator assignmentIterator = lookupMarginal.assignmentIterator();
                            boolean z = false;
                            while (true) {
                                if (!assignmentIterator.hasNext()) {
                                    break;
                                }
                                if (variable.getLabelAlphabet().lookupLabel(assignmentIterator.indexOfCurrentAssn()).toString().equals(tCellAnnotation.getAnnotation().getId())) {
                                    z = true;
                                    double value = lookupMarginal.value(assignmentIterator);
                                    if (Double.isNaN(value)) {
                                        return false;
                                    }
                                    tCellAnnotation.setFinalScore(value);
                                } else {
                                    assignmentIterator.next();
                                }
                            }
                            if (!z) {
                                tCellAnnotation.setFinalScore(0.0d);
                            }
                        }
                        Arrays.sort(contentCellAnnotations);
                        tAnnotationJI.setContentCellAnnotations(cellPosition[0], cellPosition[1], contentCellAnnotations);
                    }
                } else if (typeOfVariable.equals(VariableType.HEADER.toString())) {
                    Integer valueOf = Integer.valueOf(factorGraphBuilder.getHeaderPosition(variable));
                    if (valueOf == null) {
                        continue;
                    } else {
                        TColumnHeaderAnnotation[] headerAnnotation = tAnnotationJI.getHeaderAnnotation(valueOf.intValue());
                        for (TColumnHeaderAnnotation tColumnHeaderAnnotation : headerAnnotation) {
                            AssignmentIterator assignmentIterator2 = lookupMarginal.assignmentIterator();
                            boolean z2 = false;
                            while (true) {
                                if (!assignmentIterator2.hasNext()) {
                                    break;
                                }
                                if (variable.getLabelAlphabet().lookupLabel(assignmentIterator2.indexOfCurrentAssn()).toString().equals(tColumnHeaderAnnotation.getAnnotation().getId())) {
                                    z2 = true;
                                    double value2 = lookupMarginal.value(assignmentIterator2);
                                    if (Double.isNaN(value2)) {
                                        return false;
                                    }
                                    tColumnHeaderAnnotation.setFinalScore(value2);
                                } else {
                                    assignmentIterator2.next();
                                }
                            }
                            if (!z2) {
                                tColumnHeaderAnnotation.setFinalScore(0.0d);
                            }
                        }
                        Arrays.sort(headerAnnotation);
                        tAnnotationJI.setHeaderAnnotation(valueOf.intValue(), headerAnnotation);
                    }
                } else if (typeOfVariable.equals(VariableType.RELATION.toString())) {
                    double d = 0.0d;
                    AssignmentIterator assignmentIterator3 = lookupMarginal.assignmentIterator();
                    RelationColumns relationColumns = null;
                    while (assignmentIterator3.hasNext()) {
                        double value3 = lookupMarginal.value(assignmentIterator3);
                        if (Double.isNaN(value3)) {
                            return false;
                        }
                        String label = variable.getLabelAlphabet().lookupLabel(assignmentIterator3.indexOfCurrentAssn()).toString();
                        if (value3 >= d) {
                            d = value3;
                            relationColumns = factorGraphBuilder.getRelationDirection(label);
                        }
                        assignmentIterator3.next();
                    }
                    List<TColumnColumnRelationAnnotation> list = tAnnotationJI.getColumncolumnRelations().get(relationColumns);
                    tAnnotationJI.getColumncolumnRelations().remove(new RelationColumns(relationColumns.getObjectCol(), relationColumns.getSubjectCol()));
                    for (TColumnColumnRelationAnnotation tColumnColumnRelationAnnotation : list) {
                        AssignmentIterator assignmentIterator4 = lookupMarginal.assignmentIterator();
                        boolean z3 = false;
                        while (true) {
                            if (!assignmentIterator4.hasNext()) {
                                break;
                            }
                            if (variable.getLabelAlphabet().lookupLabel(assignmentIterator4.indexOfCurrentAssn()).toString().equals(tColumnColumnRelationAnnotation.getRelationURI())) {
                                z3 = true;
                                double value4 = lookupMarginal.value(assignmentIterator4);
                                if (Double.isNaN(value4)) {
                                    return false;
                                }
                                tColumnColumnRelationAnnotation.setFinalScore(value4);
                            } else {
                                assignmentIterator4.next();
                            }
                        }
                        if (!z3) {
                            tColumnColumnRelationAnnotation.setFinalScore(0.0d);
                        }
                    }
                    tAnnotationJI.getColumncolumnRelations().put(relationColumns, list);
                } else {
                    continue;
                }
            }
        }
        return true;
    }
}
