package uk.ac.shef.dcs.sti.TODO.gs;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.xml.serializer.SerializerConstants;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import uk.ac.shef.dcs.kbsearch.freebase.FreebaseQueryProxy;
import uk.ac.shef.dcs.kbsearch.model.Entity;
import uk.ac.shef.dcs.sti.core.model.TAnnotation;
import uk.ac.shef.dcs.sti.core.model.TCellAnnotation;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.io.TAnnotationWriter;
import uk.ac.shef.dcs.sti.parser.table.TableParserLimayeDataset;
import uk.ac.shef.dcs.sti.util.FileUtils;
import uk.ac.shef.dcs.sti.util.TripleGenerator;

/* loaded from: input_file:uk/ac/shef/dcs/sti/TODO/gs/GSBuilder_Limaye.class */
public class GSBuilder_Limaye {
    private FreebaseQueryProxy queryHelper;

    public GSBuilder_Limaye(FreebaseQueryProxy freebaseQueryProxy) {
        this.queryHelper = freebaseQueryProxy;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void main(String[] strArr) throws IOException, SAXException, ParserConfigurationException {
        find_missed_files_by_folder("E:\\Data\\table_annotation\\limaye\\all_tables_freebase_groundtruth", "E:\\Data\\table_annotation\\limaye\\all_tables_groundtruth_xml_only", "E:\\Data\\table_annotation\\limaye/gs_limaye_empty.missed");
        System.exit(0);
        TAnnotationWriter tAnnotationWriter = new TAnnotationWriter(new TripleGenerator("http://www.freebase.com", "http://dcs.shef.ac.uk"));
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        int intValue = new Integer(strArr[4]).intValue();
        List arrayList = new ArrayList();
        if (strArr.length == 6) {
            arrayList = FileUtils.readList(strArr[5], true);
        }
        GSBuilder_Limaye gSBuilder_Limaye = new GSBuilder_Limaye(null);
        int i = 0;
        File[] listFiles = new File(str2).listFiles();
        ArrayList<File> arrayList2 = new ArrayList(Arrays.asList(listFiles));
        System.out.println(listFiles.length);
        for (File file : arrayList2) {
            try {
                File file2 = new File(str + "/" + file.getName());
                if (file2.exists()) {
                    if (arrayList.size() > 0) {
                        boolean z = false;
                        Iterator it = arrayList.iterator();
                        while (true) {
                            if (!it.hasNext()) {
                                break;
                            }
                            if (file.toString().replaceAll("\\\\", "/").toLowerCase().endsWith((String) it.next())) {
                                z = true;
                                break;
                            }
                        }
                        if (!z) {
                        }
                    }
                    i++;
                    if (intValue <= i) {
                        String file3 = file.toString();
                        System.out.println(i + "_" + file3 + " " + new Date());
                        Table table = new TableParserLimayeDataset().extract(file2.toString(), null).get(0);
                        gSBuilder_Limaye.save(table, gSBuilder_Limaye.readTableAnnotation(file3, table), str3, tAnnotationWriter);
                    }
                } else {
                    System.out.println("no gs for: " + file);
                }
            } catch (Exception e) {
                System.err.println("ERROR:" + file);
                e.printStackTrace();
            }
        }
    }

    public void save(Table table, TAnnotation tAnnotation, String str, TAnnotationWriter tAnnotationWriter) throws FileNotFoundException {
        String replaceAll = table.getSourceId().replaceAll("\\\\", "/");
        int lastIndexOf = replaceAll.lastIndexOf("/");
        if (lastIndexOf != -1) {
            replaceAll = replaceAll.substring(lastIndexOf + 1).trim();
        }
        tAnnotationWriter.writeHTML(table, tAnnotation, str + File.separator + replaceAll);
        PrintWriter printWriter = new PrintWriter(str + File.separator + replaceAll + ".keys");
        for (int i = 0; i < table.getNumRows(); i++) {
            for (int i2 = 0; i2 < table.getNumCols(); i2++) {
                TCellAnnotation[] contentCellAnnotations = tAnnotation.getContentCellAnnotations(i, i2);
                if (contentCellAnnotations != null && contentCellAnnotations.length > 0) {
                    printWriter.println(i + StringUtils.COMMA_STR + i2 + StringUtils.COMMA_STR + contentCellAnnotations[0].getAnnotation().getId());
                }
            }
        }
        printWriter.close();
    }

    public TAnnotation readTableAnnotation(String str, Table table) throws IOException, ParserConfigurationException, SAXException {
        List<Node> findAll = DomUtils.findAll(DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(str), "//cellAnnotatoons/row");
        int i = 0;
        for (int i2 = 0; i2 < findAll.size(); i2++) {
            List<Node> findAll2 = DomUtils.findAll(findAll.get(i2), "entity");
            int i3 = 0;
            for (int i4 = 0; i4 < findAll2.size(); i4++) {
                Node node = findAll2.get(i4);
                if (node.getTextContent() != null && node.getTextContent().length() != 0) {
                    i3++;
                }
            }
            if (i3 > i) {
                i = i3;
            }
        }
        TAnnotation tAnnotation = new TAnnotation(table.getNumRows(), table.getNumCols());
        for (int i5 = 0; i5 < findAll.size(); i5++) {
            List<Node> findAll3 = DomUtils.findAll(findAll.get(i5), "entity");
            for (int i6 = 0; i6 < findAll3.size(); i6++) {
                Node node2 = findAll3.get(i6);
                if (node2.getTextContent() != null && node2.getTextContent().length() != 0) {
                    try {
                        TCellAnnotation[] createCellAnnotation = createCellAnnotation(node2.getTextContent().trim());
                        System.out.println("\t row=" + i5 + ",col=" + i6);
                        if (createCellAnnotation != null) {
                            tAnnotation.setContentCellAnnotations(i5, i6, createCellAnnotation);
                        } else {
                            System.out.println();
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
        }
        return tAnnotation;
    }

    public TCellAnnotation[] createCellAnnotation(String str) throws IOException {
        List<String> mqlapi_topic_mids_with_wikipedia_pageid = this.queryHelper.mqlapi_topic_mids_with_wikipedia_pageid(queryWikipediaPageId(str));
        if (mqlapi_topic_mids_with_wikipedia_pageid == null || mqlapi_topic_mids_with_wikipedia_pageid.size() == 0) {
            return null;
        }
        return new TCellAnnotation[]{new TCellAnnotation(str, new Entity(mqlapi_topic_mids_with_wikipedia_pageid.get(0), str), 1.0d, new HashMap())};
    }

    private static String queryWikipediaPageId(String str) throws IOException {
        String substring;
        int indexOf;
        String substring2;
        int indexOf2;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new URL("https://en.wikipedia.org/w/api.php?action=query&titles=" + str).openConnection().getInputStream()));
        StringBuilder sb = new StringBuilder();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            sb.append(readLine);
        }
        bufferedReader.close();
        String sb2 = sb.toString();
        int indexOf3 = sb2.indexOf("page pageid=");
        if (indexOf3 == -1 || (indexOf = (substring = sb2.substring(indexOf3)).indexOf(SerializerConstants.ENTITY_QUOT)) == -1 || (indexOf2 = (substring2 = substring.substring(indexOf + 6)).indexOf(SerializerConstants.ENTITY_QUOT)) == -1) {
            return null;
        }
        try {
            return String.valueOf(Long.valueOf(substring2.substring(0, indexOf2).trim()));
        } catch (Exception e) {
            return null;
        }
    }

    public static void find_missed_files(String str, String str2) throws IOException {
        PrintWriter printWriter = new PrintWriter(str2);
        for (String str3 : FileUtils.readList(str, false)) {
            if (str3.startsWith("ERROR:")) {
                printWriter.println(str3.substring(str3.indexOf(":") + 1).trim());
            }
        }
        printWriter.close();
    }

    public static void find_missed_files_by_folder(String str, String str2, String str3) throws IOException {
        PrintWriter printWriter = new PrintWriter(str3);
        ArrayList arrayList = new ArrayList();
        for (File file : new File(str).listFiles()) {
            arrayList.add(file.getName());
        }
        for (File file2 : new File(str2).listFiles()) {
            if (!arrayList.contains(file2.getName() + ".cell.keys")) {
                printWriter.println(file2.getName());
            }
        }
        printWriter.close();
    }
}
