package uk.ac.shef.dcs.sti.core.algorithm.tmp;

import com.gargoylesoftware.htmlunit.html.DomNode;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javafx.util.Pair;
import org.apache.log4j.Logger;
import uk.ac.shef.dcs.kbsearch.KBSearch;
import uk.ac.shef.dcs.kbsearch.KBSearchException;
import uk.ac.shef.dcs.kbsearch.model.Entity;
import uk.ac.shef.dcs.sti.STIException;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.sampler.TContentCellRanker;
import uk.ac.shef.dcs.sti.core.model.TAnnotation;
import uk.ac.shef.dcs.sti.core.model.TCell;
import uk.ac.shef.dcs.sti.core.model.TCellAnnotation;
import uk.ac.shef.dcs.sti.core.model.TColumnHeaderAnnotation;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.nlp.NLPTools;
import uk.ac.shef.dcs.util.StringUtils;

/* loaded from: input_file:uk/ac/shef/dcs/sti/core/algorithm/tmp/UPDATE.class */
public class UPDATE {
    private static final Logger LOG = Logger.getLogger(UPDATE.class.getName());
    private TCellDisambiguator disambiguator;
    private KBSearch kbSearch;
    private TColumnClassifier classifier;
    private String nlpResourcesDir;
    private TContentCellRanker selector;
    private List<String> stopWords;

    public UPDATE(TContentCellRanker tContentCellRanker, KBSearch kBSearch, TCellDisambiguator tCellDisambiguator, TColumnClassifier tColumnClassifier, List<String> list, String str) {
        this.selector = tContentCellRanker;
        this.kbSearch = kBSearch;
        this.disambiguator = tCellDisambiguator;
        this.classifier = tColumnClassifier;
        this.nlpResourcesDir = str;
        this.stopWords = list;
    }

    public void update(List<Integer> list, Table table, TAnnotation tAnnotation) throws KBSearchException, STIException {
        TAnnotation tAnnotation2;
        int i = 0;
        HashSet hashSet = new HashSet();
        do {
            LOG.info("\t>> UPDATE begins, iteration:" + i);
            hashSet.addAll(collectAllEntityCandidateIds(table, tAnnotation));
            this.classifier.updateClazzScoresByDC(tAnnotation, createDomainRep(table, tAnnotation, list), list);
            tAnnotation2 = new TAnnotation(tAnnotation.getRows(), tAnnotation.getCols());
            TAnnotation.copy(tAnnotation, tAnnotation2);
            reviseColumnAndCellAnnotations(hashSet, table, tAnnotation, list);
            LOG.info("\t>> update iteration " + tAnnotation + DomNode.READY_STATE_COMPLETE);
            boolean checkStablization = checkStablization(tAnnotation2, tAnnotation, table.getNumRows(), list);
            if (!checkStablization) {
            }
            i++;
            if (checkStablization) {
                break;
            }
        } while (i < 10);
        if (i < 10) {
            LOG.info("\t>> UPDATE STABLIZED AFTER " + i + " ITERATIONS");
            return;
        }
        LOG.warn("\t>> UPDATE CANNOT STABILIZE AFTER " + i + " ITERATIONS, Stopped");
        if (tAnnotation2 != null) {
            TAnnotation.copy(tAnnotation2, new TAnnotation(tAnnotation2.getRows(), tAnnotation2.getCols()));
        }
    }

    private Set<String> collectAllEntityCandidateIds(Table table, TAnnotation tAnnotation) {
        HashSet hashSet = new HashSet();
        for (int i = 0; i < table.getNumCols(); i++) {
            for (int i2 = 0; i2 < table.getNumRows(); i2++) {
                TCellAnnotation[] contentCellAnnotations = tAnnotation.getContentCellAnnotations(i2, i);
                if (contentCellAnnotations != null) {
                    for (TCellAnnotation tCellAnnotation : contentCellAnnotations) {
                        hashSet.add(tCellAnnotation.getAnnotation().getId());
                    }
                }
            }
        }
        return hashSet;
    }

    public List<String> createDomainRep(Table table, TAnnotation tAnnotation, List<Integer> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<Integer> it = list.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            for (int i = 0; i < table.getNumRows(); i++) {
                TCellAnnotation[] contentCellAnnotations = tAnnotation.getContentCellAnnotations(i, intValue);
                if (contentCellAnnotations != null && contentCellAnnotations.length > 0) {
                    try {
                        arrayList.addAll(createEntityDomainRep(contentCellAnnotations[0].getAnnotation()));
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
        return arrayList;
    }

    private Collection<? extends String> createEntityDomainRep(Entity entity) throws IOException {
        ArrayList arrayList = new ArrayList();
        String[] sentDetect = NLPTools.getInstance(this.nlpResourcesDir).getSentenceSplitter().sentDetect(entity.getDescription());
        arrayList.addAll(StringUtils.toBagOfWords(sentDetect.length > 0 ? sentDetect[0] : "", true, true, true));
        arrayList.removeAll(this.stopWords);
        return arrayList;
    }

    private void reviseColumnAndCellAnnotations(Set<String> set, Table table, TAnnotation tAnnotation, List<Integer> list) throws KBSearchException, STIException {
        Iterator<Integer> it = list.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            LOG.info("\t\t>> for column " + intValue);
            List<List<Integer>> select = this.selector.select(table, intValue, tAnnotation.getSubjectColumn());
            List<TColumnHeaderAnnotation> winningHeaderAnnotations = tAnnotation.getWinningHeaderAnnotations(intValue);
            HashSet hashSet = new HashSet();
            Iterator<TColumnHeaderAnnotation> it2 = winningHeaderAnnotations.iterator();
            while (it2.hasNext()) {
                hashSet.add(it2.next().getAnnotation().getId());
            }
            ArrayList arrayList = new ArrayList();
            for (int i = 0; i < select.size(); i++) {
                List<Integer> list2 = select.get(i);
                TCell contentCell = table.getContentCell(list2.get(0).intValue(), intValue);
                if (contentCell.getText().length() < 2) {
                    LOG.info("\t\t>>> short text cell skipped: " + list2 + org.apache.hadoop.util.StringUtils.COMMA_STR + intValue + " " + contentCell.getText());
                } else {
                    List<Pair<Entity, Map<String, Double>>> disambiguate = disambiguate(set, contentCell, table, hashSet, list2, intValue, select.size());
                    if (disambiguate.size() > 0) {
                        this.disambiguator.addCellAnnotation(table, tAnnotation, list2, intValue, disambiguate);
                        arrayList.addAll(list2);
                    }
                }
            }
            this.classifier.updateColumnClazz(arrayList, intValue, tAnnotation, table, true);
            TColumnHeaderAnnotation[] headerAnnotation = tAnnotation.getHeaderAnnotation(intValue);
            for (TColumnHeaderAnnotation tColumnHeaderAnnotation : headerAnnotation) {
                Double d = tColumnHeaderAnnotation.getScoreElements().get("domain_consensus");
                if (d != null) {
                    tColumnHeaderAnnotation.setFinalScore(tColumnHeaderAnnotation.getFinalScore() + d.doubleValue());
                }
            }
            Arrays.sort(headerAnnotation);
            tAnnotation.setHeaderAnnotation(intValue, headerAnnotation);
        }
    }

    private List<Pair<Entity, Map<String, Double>>> disambiguate(Set<String> set, TCell tCell, Table table, Set<String> set2, List<Integer> list, int i, int i2) throws KBSearchException {
        List<Entity> findEntityCandidatesOfTypes = this.kbSearch.findEntityCandidatesOfTypes(tCell.getText(), (String[]) set2.toArray(new String[0]));
        int i3 = 0;
        Iterator<Entity> it = findEntityCandidatesOfTypes.iterator();
        while (it.hasNext()) {
            if (set.contains(it.next().getId())) {
                i3++;
            }
        }
        if (findEntityCandidatesOfTypes == null || findEntityCandidatesOfTypes.size() == 0) {
            findEntityCandidatesOfTypes = this.kbSearch.findEntityCandidatesOfTypes(tCell.getText(), new String[0]);
        }
        LOG.debug("\t\t>> Rows=" + list + "/" + i2 + " (Total candidates=" + findEntityCandidatesOfTypes.size() + ", previously already processed=" + i3 + ")");
        return this.disambiguator.constrainedDisambiguate(findEntityCandidatesOfTypes, table, list, i, i2, false);
    }

    private boolean checkStablization(TAnnotation tAnnotation, TAnnotation tAnnotation2, int i, List<Integer> list) {
        int i2 = 0;
        Iterator<Integer> it = list.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            List<TColumnHeaderAnnotation> winningHeaderAnnotations = tAnnotation.getWinningHeaderAnnotations(intValue);
            List<TColumnHeaderAnnotation> winningHeaderAnnotations2 = tAnnotation2.getWinningHeaderAnnotations(intValue);
            if (winningHeaderAnnotations2.size() != winningHeaderAnnotations.size()) {
                return false;
            }
            winningHeaderAnnotations2.retainAll(winningHeaderAnnotations);
            if (winningHeaderAnnotations2.size() != winningHeaderAnnotations.size()) {
                return false;
            }
            i2++;
        }
        boolean z = i2 == list.size();
        Iterator<Integer> it2 = list.iterator();
        while (it2.hasNext()) {
            int intValue2 = it2.next().intValue();
            for (int i3 = 0; i3 < i; i3++) {
                List<TCellAnnotation> winningContentCellAnnotation = tAnnotation.getWinningContentCellAnnotation(i3, intValue2);
                List<TCellAnnotation> winningContentCellAnnotation2 = tAnnotation2.getWinningContentCellAnnotation(i3, intValue2);
                if (winningContentCellAnnotation2.size() == winningContentCellAnnotation.size()) {
                    winningContentCellAnnotation2.retainAll(winningContentCellAnnotation);
                    if (winningContentCellAnnotation2.size() != winningContentCellAnnotation.size()) {
                        return false;
                    }
                }
            }
        }
        return z && 1 != 0;
    }
}
