package uk.ac.shef.dcs.sti.core.subjectcol;

import cern.colt.matrix.DoubleMatrix2D;
import cern.colt.matrix.impl.SparseDoubleMatrix2D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.sampler.TContentRowRanker;
import uk.ac.shef.dcs.sti.core.algorithm.tmp.stopping.StoppingCriteria;
import uk.ac.shef.dcs.sti.core.model.TCell;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.nlp.NLPTools;
import uk.ac.shef.dcs.sti.util.DataTypeClassifier;
import uk.ac.shef.dcs.util.SolrCache;
import uk.ac.shef.dcs.util.StringUtils;
import uk.ac.shef.dcs.websearch.WebSearchException;
import uk.ac.shef.dcs.websearch.WebSearchFactory;
import uk.ac.shef.dcs.websearch.bing.v2.APIKeysDepletedException;

/* loaded from: input_file:uk/ac/shef/dcs/sti/core/subjectcol/TColumnFeatureGenerator.class */
public class TColumnFeatureGenerator {
    private CMScorer cmScorer;
    private WSScorer wsScorer;
    private NLPTools nlpTools;

    public TColumnFeatureGenerator(EmbeddedSolrServer embeddedSolrServer, String str, List<String> list, String str2) throws IOException, WebSearchException {
        this.cmScorer = new CMScorer(str);
        this.wsScorer = new WSScorer(new SolrCache(embeddedSolrServer), new WebSearchFactory().createInstance(str2), list);
        this.nlpTools = NLPTools.getInstance(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setEmptyCellCount(List<TColumnFeature> list, Table table) {
        for (TColumnFeature tColumnFeature : list) {
            int colId = tColumnFeature.getColId();
            int i = 0;
            for (int i2 = 0; i2 < table.getNumRows(); i2++) {
                String text = table.getContentCell(i2, colId).getText();
                if (text == null || text.length() == 0) {
                    i++;
                }
            }
            tColumnFeature.setEmptyCellCount(i);
        }
    }

    public static void setColumnDataTypes(Table table) {
        for (int i = 0; i < table.getNumCols(); i++) {
            HashMap hashMap = new HashMap();
            ArrayList arrayList = new ArrayList();
            boolean z = false;
            for (int i2 = 0; i2 < table.getNumRows(); i2++) {
                TCell contentCell = table.getContentCell(i2, i);
                String text = contentCell.getText();
                if (text != null) {
                    DataTypeClassifier.DataType classify = DataTypeClassifier.classify(text);
                    contentCell.setType(classify);
                    if (classify.equals(DataTypeClassifier.DataType.NUMBER)) {
                        arrayList.add(StringUtils.toAlphaNumericWhitechar(text).trim());
                    }
                    if (classify.equals(DataTypeClassifier.DataType.LONG_TEXT)) {
                        z = true;
                    }
                    TColumnDataType tColumnDataType = (TColumnDataType) hashMap.get(classify);
                    if (tColumnDataType == null) {
                        tColumnDataType = new TColumnDataType(classify, 0);
                    }
                    tColumnDataType.setSupportingRows(tColumnDataType.getSupportingRows() + 1);
                    hashMap.put(classify, tColumnDataType);
                }
            }
            if (z) {
                hashMap.clear();
                DataTypeClassifier.DataType dataType = DataTypeClassifier.DataType.LONG_TEXT;
                hashMap.put(dataType, new TColumnDataType(dataType, arrayList.size()));
            } else {
                ArrayList arrayList2 = new ArrayList(hashMap.values());
                Collections.sort(arrayList2);
                if (arrayList.size() != 0 && ((TColumnDataType) arrayList2.get(0)).getType().equals(DataTypeClassifier.DataType.NUMBER) && DataTypeClassifier.isOrderedNumber((String[]) arrayList.toArray(new String[0]))) {
                    hashMap.clear();
                    DataTypeClassifier.DataType dataType2 = DataTypeClassifier.DataType.ORDERED_NUMBER;
                    hashMap.put(dataType2, new TColumnDataType(dataType2, arrayList.size()));
                }
            }
            table.getColumnHeader(i).setType(new ArrayList(hashMap.values()));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setMostFrequentDataTypes(List<TColumnFeature> list, Table table) {
        for (TColumnFeature tColumnFeature : list) {
            List<TColumnDataType> types = table.getColumnHeader(tColumnFeature.getColId()).getTypes();
            Collections.sort(types);
            tColumnFeature.setMostFrequentDataType(types.get(0));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setIsFirstNEColumn(List<TColumnFeature> list) {
        for (TColumnFeature tColumnFeature : list) {
            if (tColumnFeature.getMostFrequentDataType().getType().equals(DataTypeClassifier.DataType.NAMED_ENTITY)) {
                tColumnFeature.setFirstNEColumn(true);
                return;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int setOnlyNEColumn(List<TColumnFeature> list) {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            if (list.get(i).getMostFrequentDataType().getType().equals(DataTypeClassifier.DataType.NAMED_ENTITY)) {
                arrayList.add(Integer.valueOf(i));
            }
        }
        if (arrayList.size() != 1) {
            return -1;
        }
        list.get(((Integer) arrayList.get(0)).intValue()).setOnlyNEColumn(true);
        return list.get(((Integer) arrayList.get(0)).intValue()).getColId();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int setOnlyNonEmptyNEColumn(List<TColumnFeature> list) {
        int i = -1;
        int i2 = 0;
        for (int i3 = 0; i3 < list.size(); i3++) {
            if (list.get(i3).getEmptyCellCount() == 0) {
                i2++;
                if (i != -1) {
                    break;
                }
                i = i3;
            }
        }
        if (i == -1 || i2 != 1) {
            return -1;
        }
        TColumnFeature tColumnFeature = list.get(i);
        if (tColumnFeature.isAcronymColumn()) {
            return -1;
        }
        tColumnFeature.setIsOnlyNonEmptyNEColumn(true);
        return tColumnFeature.getColId();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int setOnlyNonDuplicateNEColumn(List<TColumnFeature> list, Table table) {
        int i = -1;
        int i2 = 0;
        for (int i3 = 0; i3 < list.size(); i3++) {
            TColumnFeature tColumnFeature = list.get(i3);
            if (tColumnFeature.getUniqueCellCount() == 1.0d && !tColumnFeature.isAcronymColumn() && tColumnFeature.getMostFrequentDataType().getSupportingRows() == table.getNumRows()) {
                i2++;
                if (i != -1) {
                    break;
                }
                i = i3;
            }
        }
        if (i == -1 || i2 != 1) {
            return -1;
        }
        TColumnFeature tColumnFeature2 = list.get(i);
        if (tColumnFeature2.isAcronymColumn()) {
            return -1;
        }
        tColumnFeature2.setIsOnlyNonDuplicateNEColumn(true);
        return tColumnFeature2.getColId();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setUniqueValueCount(List<TColumnFeature> list, Table table) {
        for (TColumnFeature tColumnFeature : list) {
            int colId = tColumnFeature.getColId();
            HashSet hashSet = new HashSet();
            HashSet hashSet2 = new HashSet();
            int i = 0;
            for (int i2 = 0; i2 < table.getNumRows(); i2++) {
                TCell contentCell = table.getContentCell(i2, colId);
                hashSet.add(contentCell.getText());
                for (String str : contentCell.getText().split("\\s+")) {
                    hashSet2.add(str.trim());
                    i++;
                }
            }
            tColumnFeature.setUniqueCellCount(hashSet.size() / table.getNumRows());
            tColumnFeature.setUniqueTokenCount((hashSet2.size() / table.getNumRows()) / (i / table.getNumRows()));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setCMScores(List<TColumnFeature> list, Table table) {
        int[] iArr = new int[list.size()];
        for (int i = 0; i < list.size(); i++) {
            iArr[i] = list.get(i).getColId();
        }
        Map<Integer, Double> score = this.cmScorer.score(table, iArr);
        for (TColumnFeature tColumnFeature : list) {
            Double d = score.get(Integer.valueOf(tColumnFeature.getColId()));
            tColumnFeature.setContextMatchScore(Double.valueOf(d == null ? 0.0d : d.doubleValue()).doubleValue());
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public DoubleMatrix2D setWSScores(List<TColumnFeature> list, Table table) throws APIKeysDepletedException, IOException {
        SparseDoubleMatrix2D sparseDoubleMatrix2D = new SparseDoubleMatrix2D(table.getNumRows(), table.getNumCols());
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            DataTypeClassifier.DataType type = list.get(i).getMostFrequentDataType().getType();
            if (type.equals(DataTypeClassifier.DataType.NAMED_ENTITY) || type.equals(DataTypeClassifier.DataType.SHORT_TEXT)) {
                arrayList.add(Integer.valueOf(list.get(i).getColId()));
            }
        }
        for (int i2 = 0; i2 < table.getNumRows(); i2++) {
            String[] strArr = new String[arrayList.size()];
            for (int i3 = 0; i3 < arrayList.size(); i3++) {
                strArr[i3] = this.wsScorer.normalize(table.getContentCell(i2, ((Integer) arrayList.get(i3)).intValue()).getText());
            }
            Map<String, Double> score = this.wsScorer.score(strArr);
            for (int i4 = 0; i4 < arrayList.size(); i4++) {
                int intValue = ((Integer) arrayList.get(i4)).intValue();
                String str = strArr[i4];
                if (str.length() >= 1) {
                    Double d = score.get(str);
                    sparseDoubleMatrix2D.set(i2, intValue, Double.valueOf(d == null ? 0.0d : d.doubleValue()).doubleValue());
                }
            }
        }
        return sparseDoubleMatrix2D;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public DoubleMatrix2D setWSScores(List<TColumnFeature> list, Table table, TContentRowRanker tContentRowRanker, StoppingCriteria stoppingCriteria, int i) throws APIKeysDepletedException, IOException {
        if (i > table.getNumRows()) {
            return setWSScores(list, table);
        }
        SparseDoubleMatrix2D sparseDoubleMatrix2D = new SparseDoubleMatrix2D(table.getNumRows(), table.getNumCols());
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        for (TColumnFeature tColumnFeature : list) {
            DataTypeClassifier.DataType type = tColumnFeature.getMostFrequentDataType().getType();
            if (type.equals(DataTypeClassifier.DataType.NAMED_ENTITY) || type.equals(DataTypeClassifier.DataType.SHORT_TEXT)) {
                arrayList.add(Integer.valueOf(tColumnFeature.getColId()));
            }
        }
        int i2 = 0;
        for (int i3 : tContentRowRanker.select(table)) {
            String[] strArr = new String[arrayList.size()];
            for (int i4 = 0; i4 < arrayList.size(); i4++) {
                strArr[i4] = this.wsScorer.normalize(table.getContentCell(i3, ((Integer) arrayList.get(i4)).intValue()).getText());
            }
            Map<String, Double> score = this.wsScorer.score(strArr);
            for (int i5 = 0; i5 < arrayList.size(); i5++) {
                int intValue = ((Integer) arrayList.get(i5)).intValue();
                String str = strArr[i5];
                if (str.length() >= 1) {
                    Double d = score.get(str);
                    Double valueOf = Double.valueOf(d == null ? 0.0d : d.doubleValue());
                    sparseDoubleMatrix2D.set(i3, intValue, valueOf.doubleValue());
                    Double d2 = hashMap.get(Integer.valueOf(intValue));
                    hashMap.put(Integer.valueOf(intValue), Double.valueOf(Double.valueOf(d2 == null ? 0.0d : d2.doubleValue()).doubleValue() + valueOf.doubleValue()));
                }
            }
            i2++;
            if (stoppingCriteria.stop(hashMap, table.getNumRows())) {
                break;
            }
        }
        return sparseDoubleMatrix2D;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setInvalidHeaderTextSyntax(List<TColumnFeature> list, Table table) {
        for (TColumnFeature tColumnFeature : list) {
            String[] tag = this.nlpTools.getPosTagger().tag(table.getColumnHeader(tColumnFeature.getColId()).getHeaderText().toLowerCase().split("\\s+"));
            if (tag[tag.length - 1].equals("IN") || tag[tag.length - 1].equals("TO")) {
                tColumnFeature.setInvalidPOS(true);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setAcronymColumnBoolean(List<TColumnFeature> list, Table table) {
        for (TColumnFeature tColumnFeature : list) {
            int colId = tColumnFeature.getColId();
            int i = 0;
            for (int i2 = 0; i2 < table.getNumRows(); i2++) {
                String trim = table.getContentCell(i2, colId).getText().replaceAll("\\s+", " ").trim();
                if (trim.length() != 0 && trim.length() < 15) {
                    int i3 = 0;
                    int i4 = 0;
                    int i5 = 0;
                    boolean z = true;
                    for (int i6 = 0; i6 < trim.length(); i6++) {
                        char charAt = trim.charAt(i6);
                        if (Character.isWhitespace(charAt)) {
                            i3++;
                        } else if (Character.isLetter(charAt)) {
                            i4++;
                            if (!Character.isUpperCase(charAt)) {
                                z = false;
                            }
                        } else if (Character.isDigit(charAt)) {
                            i5++;
                        }
                    }
                    if (i3 == 0 && i5 > 0 && i4 > 0) {
                        i++;
                    } else if (i3 == 0 && i4 > 0 && z && trim.length() < 6) {
                        i++;
                    } else if (i3 == 1 && z) {
                        i++;
                    }
                }
            }
            if (i > (table.getNumRows() - tColumnFeature.getEmptyCellCount()) - i) {
                tColumnFeature.setAcronymColumn(true);
            }
        }
    }
}
