package uk.ac.shef.dcs.sti.TODO.gs;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.List;
import org.apache.any23.util.FileUtils;
import uk.ac.shef.dcs.kbsearch.freebase.FreebaseQueryProxy;
import uk.ac.shef.dcs.kbsearch.model.Entity;
import uk.ac.shef.dcs.sti.core.model.TAnnotation;
import uk.ac.shef.dcs.sti.core.model.TCellAnnotation;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.io.TAnnotationWriter;
import uk.ac.shef.dcs.sti.parser.table.TableParserIMDB;
import uk.ac.shef.dcs.sti.parser.table.creator.TableObjCreatorIMDB;
import uk.ac.shef.dcs.sti.parser.table.hodetector.TableHODetectorByHTMLTag;
import uk.ac.shef.dcs.sti.parser.table.normalizer.TableNormalizerDiscardIrregularRows;
import uk.ac.shef.dcs.sti.parser.table.validator.TableValidatorGeneric;
import uk.ac.shef.dcs.sti.util.TripleGenerator;

/* loaded from: input_file:uk/ac/shef/dcs/sti/TODO/gs/GSBuilder_IMDB.class */
public class GSBuilder_IMDB {
    public static void main(String[] strArr) throws IOException {
        GSBuilder_IMDB gSBuilder_IMDB = new GSBuilder_IMDB();
        TAnnotationWriter tAnnotationWriter = new TAnnotationWriter(new TripleGenerator("http://www.freebase.com", "http://dcs.shef.ac.uk"));
        String str = strArr[0];
        String str2 = strArr[1];
        TableParserIMDB tableParserIMDB = new TableParserIMDB(new TableNormalizerDiscardIrregularRows(true), new TableHODetectorByHTMLTag(), new TableObjCreatorIMDB(), new TableValidatorGeneric());
        int i = 0;
        File[] listFiles = new File(str).listFiles();
        System.out.println(listFiles.length);
        for (File file : listFiles) {
            i++;
            System.out.println(i);
            String file2 = file.toString();
            try {
                List<Table> extract = tableParserIMDB.extract(FileUtils.readFileContent(new File(file2)), file2);
                if (extract.size() != 0) {
                    Table table = extract.get(0);
                    System.out.println(file + ", with rows: " + table.getNumRows());
                    TAnnotation annotate = gSBuilder_IMDB.annotate(table, null);
                    if (annotate != null) {
                        int i2 = 0;
                        for (int i3 = 0; i3 < table.getNumRows(); i3++) {
                            for (int i4 = 0; i4 < table.getNumCols(); i4++) {
                                TCellAnnotation[] contentCellAnnotations = annotate.getContentCellAnnotations(i3, i4);
                                if (contentCellAnnotations != null && contentCellAnnotations.length > 0) {
                                    i2++;
                                }
                            }
                        }
                        if (i2 > 0) {
                            gSBuilder_IMDB.save(table, annotate, str2, tAnnotationWriter);
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                PrintWriter printWriter = null;
                try {
                    printWriter = new PrintWriter(new FileWriter("missed.csv", true));
                } catch (IOException e2) {
                    e2.printStackTrace();
                }
                printWriter.println(file2);
                printWriter.close();
            }
        }
    }

    public TAnnotation annotate(Table table, FreebaseQueryProxy freebaseQueryProxy) throws IOException {
        List searchapi_getTopicsByNameAndType;
        TAnnotation tAnnotation = new TAnnotation(table.getNumRows(), table.getNumCols());
        for (int i = 0; i < table.getNumRows(); i++) {
            String text = table.getContentCell(i, 0).getText();
            int indexOf = text.indexOf("/name/");
            if (indexOf != -1) {
                int i2 = indexOf + 6;
                int lastIndexOf = text.lastIndexOf("/");
                if (lastIndexOf != -1 && (searchapi_getTopicsByNameAndType = freebaseQueryProxy.searchapi_getTopicsByNameAndType(text.substring(i2, lastIndexOf).trim(), "any", false, 5, new String[0])) != null && searchapi_getTopicsByNameAndType.size() != 0) {
                    tAnnotation.setContentCellAnnotations(i, 1, new TCellAnnotation[]{new TCellAnnotation(text, (Entity) searchapi_getTopicsByNameAndType.get(0), 1.0d, new HashMap())});
                }
            }
        }
        return tAnnotation;
    }

    public void save(Table table, TAnnotation tAnnotation, String str, TAnnotationWriter tAnnotationWriter) throws FileNotFoundException {
        String replaceAll = table.getSourceId().replaceAll("\\\\", "/");
        int lastIndexOf = replaceAll.lastIndexOf("/");
        if (lastIndexOf != -1) {
            replaceAll = replaceAll.substring(lastIndexOf + 1).trim();
        }
        tAnnotationWriter.writeHTML(table, tAnnotation, str + File.separator + replaceAll);
        PrintWriter printWriter = new PrintWriter(str + File.separator + replaceAll + ".keys");
        for (int i = 0; i < table.getNumRows(); i++) {
            for (int i2 = 0; i2 < table.getNumCols(); i2++) {
                TCellAnnotation[] contentCellAnnotations = tAnnotation.getContentCellAnnotations(i, i2);
                if (contentCellAnnotations != null && contentCellAnnotations.length > 0) {
                    printWriter.println(i + "," + i2 + "," + contentCellAnnotations[0].getAnnotation().getId());
                }
            }
        }
        printWriter.close();
    }
}
