package com.github.romualdrousseau.any2json.loader.pdf;

import com.github.romualdrousseau.any2json.base.PatcheableSheetStore;
import com.github.romualdrousseau.shuju.bigdata.DataFrame;
import com.github.romualdrousseau.shuju.bigdata.DataFrameWriter;
import com.github.romualdrousseau.shuju.bigdata.Row;
import com.github.romualdrousseau.shuju.strings.StringUtils;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import technology.tabula.ObjectExtractor;
import technology.tabula.Page;
import technology.tabula.PageIterator;
import technology.tabula.RectangularTextContainer;
import technology.tabula.Table;
import technology.tabula.TextElement;
import technology.tabula.extractors.BasicExtractionAlgorithm;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:com/github/romualdrousseau/any2json/loader/pdf/PdfSheet.class */
public class PdfSheet extends PatcheableSheetStore implements Closeable {
    private static final int BATCH_SIZE = 50000;
    private static final int MAX_COLUMNS = 100;
    private static final int LATICE_SPACES = 3;
    private static final int LATICE_MARGINS = 1;
    private static final int LATICE_COLUMN_SEPARATORS = 4;
    private final String name;
    private PDDocument reader;
    private DataFrame rows = null;

    public PdfSheet(String str, PDDocument pDDocument) {
        this.name = str;
        this.reader = pDDocument;
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        if (this.rows != null) {
            this.rows.close();
        }
        if (this.reader != null) {
            this.reader.close();
        }
    }

    public String getName() {
        return this.name;
    }

    public int getLastColumnNum(int i) {
        return this.rows.getColumnCount(i) - LATICE_MARGINS;
    }

    public int getLastRowNum() {
        return this.rows.getRowCount() - LATICE_MARGINS;
    }

    public boolean hasCellDataAt(int i, int i2) {
        return (getPatchCell(i, i2) == null && getCellAt(i, i2) == null) ? false : true;
    }

    public String getCellDataAt(int i, int i2) {
        String patchCell = getPatchCell(i, i2);
        return patchCell != null ? patchCell : StringUtils.cleanToken(getCellAt(i, i2));
    }

    public int getNumberOfMergedCellsAt(int i, int i2) {
        return LATICE_MARGINS;
    }

    public PdfSheet ensureDataLoaded() {
        if (this.rows != null) {
            return this;
        }
        try {
            DataFrameWriter dataFrameWriter = new DataFrameWriter(BATCH_SIZE, MAX_COLUMNS);
            try {
                this.rows = processRows(this.reader, dataFrameWriter);
                this.reader.close();
                this.reader = null;
                dataFrameWriter.close();
                return this;
            } finally {
            }
        } catch (IOException e) {
            return this;
        }
    }

    private DataFrame processRows(PDDocument pDDocument, DataFrameWriter dataFrameWriter) throws IOException {
        SpreadsheetExtractionAlgorithm spreadsheetExtractionAlgorithm = new SpreadsheetExtractionAlgorithm();
        BasicExtractionAlgorithm basicExtractionAlgorithm = new BasicExtractionAlgorithm();
        ObjectExtractor objectExtractor = new ObjectExtractor(pDDocument);
        try {
            PageIterator extract = objectExtractor.extract();
            while (extract.hasNext()) {
                Page next = extract.next();
                if (spreadsheetExtractionAlgorithm.isTabular(next)) {
                    processRowsTabular(spreadsheetExtractionAlgorithm, next, dataFrameWriter);
                } else {
                    processRowsLatice(basicExtractionAlgorithm, next, dataFrameWriter);
                }
                dataFrameWriter.write(Row.of(new String[]{""}));
                dataFrameWriter.write(Row.of(new String[]{""}));
            }
            objectExtractor.close();
            return dataFrameWriter.getDataFrame();
        } catch (Throwable th) {
            try {
                objectExtractor.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    private void processRowsTabular(SpreadsheetExtractionAlgorithm spreadsheetExtractionAlgorithm, Page page, DataFrameWriter dataFrameWriter) throws IOException {
        Iterator it = spreadsheetExtractionAlgorithm.extract(page).iterator();
        while (it.hasNext()) {
            for (List list : ((Table) it.next()).getRows()) {
                ArrayList arrayList = new ArrayList();
                Iterator it2 = list.iterator();
                while (it2.hasNext()) {
                    arrayList.add(StringUtils.cleanToken(((RectangularTextContainer) it2.next()).getText()));
                }
                dataFrameWriter.write(Row.of((String[]) arrayList.toArray(i -> {
                    return new String[i];
                })));
            }
            dataFrameWriter.write(Row.of(new String[]{""}));
            dataFrameWriter.write(Row.of(new String[]{""}));
        }
    }

    private void processRowsLatice(BasicExtractionAlgorithm basicExtractionAlgorithm, Page page, DataFrameWriter dataFrameWriter) throws IOException {
        ArrayList<String> arrayList = new ArrayList<>();
        Iterator it = basicExtractionAlgorithm.extract(page).iterator();
        while (it.hasNext()) {
            boolean z = false;
            Iterator it2 = ((Table) it.next()).getRows().iterator();
            while (it2.hasNext()) {
                List<TextElement> elements = getElements((List) it2.next());
                if (elements.size() <= 0) {
                    z = false;
                } else if (isTableRow(elements, z)) {
                    if (!z) {
                        dataFrameWriter.write(Row.of(new String[]{""}));
                        dataFrameWriter.write(Row.of(new String[]{""}));
                    }
                    arrayList.add(getTableRow(elements));
                    z = LATICE_MARGINS;
                } else {
                    if (arrayList.size() > 0) {
                        processTableLatice(arrayList, dataFrameWriter);
                        arrayList.clear();
                    }
                    if (z) {
                        dataFrameWriter.write(Row.of(new String[]{""}));
                        dataFrameWriter.write(Row.of(new String[]{""}));
                    }
                    dataFrameWriter.write(Row.of(new String[]{StringUtils.cleanToken(getText(elements))}));
                    z = false;
                }
            }
        }
        if (arrayList.size() > 0) {
            processTableLatice(arrayList, dataFrameWriter);
            arrayList.clear();
        }
    }

    private void processTableLatice(ArrayList<String> arrayList, DataFrameWriter dataFrameWriter) throws IOException {
        ArrayList arrayList2 = new ArrayList();
        int asInt = arrayList.stream().mapToInt(str -> {
            return str.length();
        }).max().getAsInt();
        int i = -1;
        for (int i2 = 0; i2 < asInt; i2 += LATICE_MARGINS) {
            int i3 = i2;
            if (arrayList.stream().allMatch(str2 -> {
                return i3 >= str2.length() || isLaticeSpace(str2.charAt(i3));
            })) {
                if (i >= 0 && i3 - i == LATICE_MARGINS) {
                    arrayList2.remove(arrayList2.size() - LATICE_MARGINS);
                }
                arrayList2.add(Integer.valueOf(i3));
                i = i3;
            }
        }
        arrayList2.add(Integer.valueOf(asInt - LATICE_MARGINS));
        Iterator<String> it = arrayList.iterator();
        while (it.hasNext()) {
            String next = it.next();
            ArrayList arrayList3 = new ArrayList();
            for (int i4 = 0; i4 < arrayList2.size() - LATICE_MARGINS; i4 += LATICE_MARGINS) {
                Integer num = (Integer) arrayList2.get(i4);
                if (num.intValue() < next.length()) {
                    Integer num2 = (Integer) arrayList2.get(i4 + LATICE_MARGINS);
                    if (num2.intValue() < next.length() - LATICE_MARGINS) {
                        arrayList3.add(StringUtils.cleanToken(next.substring(num.intValue(), num2.intValue())));
                    } else {
                        arrayList3.add(StringUtils.cleanToken(next.substring(num.intValue())));
                    }
                } else {
                    arrayList3.add("");
                }
            }
            dataFrameWriter.write(Row.of((String[]) arrayList3.toArray(new String[0])));
        }
    }

    private List<TextElement> getElements(List<RectangularTextContainer> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<RectangularTextContainer> it = list.iterator();
        while (it.hasNext()) {
            for (Object obj : it.next().getTextElements()) {
                if (obj instanceof TextElement) {
                    arrayList.add((TextElement) obj);
                }
            }
        }
        return arrayList;
    }

    private boolean isTableRow(List<TextElement> list, boolean z) {
        int floor = (int) Math.floor(Math.max(list.get(0).getX() / list.get(0).getWidthOfSpace(), 0.0d) / 3.0d);
        int i = 0;
        double x = list.get(0).getX();
        for (TextElement textElement : list) {
            if (Math.max(((textElement.getX() - x) / textElement.getWidthOfSpace()) - 3.0d, 0.0d) > 0.0d) {
                i += LATICE_MARGINS;
            }
            x = textElement.getX();
        }
        double pRowMargin = (0.5d * pRowMargin(floor)) + (0.5d * pRowSeparators(i));
        return !z ? pRowMargin == 1.0d : pRowMargin >= 0.5d;
    }

    private String getTableRow(List<TextElement> list) {
        String str = "";
        for (TextElement textElement : list) {
            double max = Math.max((textElement.getX() / textElement.getWidthOfSpace()) - 1.0d, 0.0d) - str.length();
            for (int i = 0; i < max; i += LATICE_MARGINS) {
                str = str + " ";
            }
            str = str + textElement.getText();
        }
        return str;
    }

    private String getText(List<TextElement> list) {
        String str = "";
        Iterator<TextElement> it = list.iterator();
        while (it.hasNext()) {
            str = str + it.next().getText();
        }
        return str;
    }

    private String getCellAt(int i, int i2) {
        if (i2 >= this.rows.getRowCount()) {
            return null;
        }
        Row row = this.rows.getRow(i2);
        if (i >= row.size()) {
            return null;
        }
        return row.get(i);
    }

    private float pRowMargin(int i) {
        return i >= LATICE_MARGINS ? 1.0f : 0.0f;
    }

    private float pRowSeparators(int i) {
        return i >= LATICE_COLUMN_SEPARATORS ? 1.0f : 0.0f;
    }

    private boolean isLaticeSpace(char c) {
        return List.of(' ', '-', '_', '|').contains(Character.valueOf(c));
    }
}
