/*
 * Decompiled with CFR 0.152.
 */
package de.citec.scie.pdf;

import de.citec.scie.pdf.StringSimilarity;
import de.citec.scie.pdf.structure.Document;
import de.citec.scie.pdf.structure.Page;
import de.citec.scie.pdf.structure.TextBlock;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;

public class DocumentBlockCleaner {
    public static final int SMALLBLOCKSIZE = 200;
    public static final double REMOVETHRESHOLD = 0.7;

    public void blockCleanup(Document doc) {
        HashMap<TextBlock, String> convertedBlocks = new HashMap<TextBlock, String>();
        ArrayList<Page> pagesToRemove = new ArrayList<Page>();
        for (int pageIdx = 0; pageIdx < doc.content.size(); ++pageIdx) {
            ArrayList<TextBlock> blocksToRemove = new ArrayList<TextBlock>();
            ArrayList<TextBlock> currentBlocks = doc.content.get((int)pageIdx).content;
            for (TextBlock block : currentBlocks) {
                int i;
                String blockString = block.toString();
                if (blockString.length() >= 200) continue;
                FittingBlock[] fittingBlocks = this.findBestMatches(doc, blockString, convertedBlocks, pageIdx);
                this.optimize(fittingBlocks);
                double maxConfidence = 0.0;
                for (i = 0; i < fittingBlocks.length; ++i) {
                    if (fittingBlocks[i] == null || !(fittingBlocks[i].getMatchConfidence() > maxConfidence)) continue;
                    maxConfidence = fittingBlocks[i].getMatchConfidence();
                }
                if (!(maxConfidence > 0.7)) continue;
                for (i = 0; i < fittingBlocks.length; ++i) {
                    if (fittingBlocks[i] == null || fittingBlocks[i].getMatch() == null) continue;
                    doc.content.get((int)(pageIdx + 1 + i)).content.remove(fittingBlocks[i].getMatch());
                }
                blocksToRemove.add(block);
            }
            for (TextBlock blockToRemove : blocksToRemove) {
                currentBlocks.remove(blockToRemove);
            }
            if (!currentBlocks.isEmpty()) continue;
            pagesToRemove.add(doc.content.get(pageIdx));
        }
        for (Page page : pagesToRemove) {
            doc.content.remove(page);
        }
    }

    private FittingBlock[] findBestMatches(Document doc, String blockString, HashMap<TextBlock, String> convertedBlocks, int startPageIdx) {
        StringSimilarity simAlgo = new StringSimilarity();
        FittingBlock[] fittingBlocks = new FittingBlock[doc.content.size() - startPageIdx - 1];
        for (int otherPageIdx = startPageIdx + 1; otherPageIdx < doc.content.size(); ++otherPageIdx) {
            double maxConfidence = 0.0;
            TextBlock maxBlock = null;
            for (TextBlock otherBlock : doc.content.get((int)otherPageIdx).content) {
                double confidence;
                String otherString = convertedBlocks.get(otherBlock);
                if (otherString == null) {
                    otherString = otherBlock.toString();
                    convertedBlocks.put(otherBlock, otherString);
                }
                if (otherString.length() >= 200 || !((confidence = simAlgo.calculate(blockString, otherString)) > maxConfidence)) continue;
                maxConfidence = confidence;
                maxBlock = otherBlock;
            }
            fittingBlocks[otherPageIdx - startPageIdx - 1] = new FittingBlock(maxConfidence, maxBlock);
        }
        return fittingBlocks;
    }

    private void optimize(FittingBlock[] fittingBlocks) {
        FittingBlock lowestConfidenceBlock;
        double current;
        double currentConfidence = 1.0;
        for (FittingBlock block : fittingBlocks) {
            currentConfidence *= block.getMatchConfidence();
        }
        int currentPages = fittingBlocks.length;
        double optimum = (double)currentPages * currentConfidence;
        ArrayList<FittingBlock> blockList = new ArrayList<FittingBlock>(Arrays.asList(fittingBlocks));
        Collections.sort(blockList);
        ArrayDeque<FittingBlock> blockQueue = new ArrayDeque<FittingBlock>(blockList);
        block1: while (!blockQueue.isEmpty() && (current = (currentConfidence /= (lowestConfidenceBlock = blockQueue.poll()).getMatchConfidence()) * (double)(--currentPages)) > optimum) {
            optimum = current;
            for (int i = 0; i < fittingBlocks.length; ++i) {
                if (lowestConfidenceBlock != fittingBlocks[i]) continue;
                fittingBlocks[i] = null;
                continue block1;
            }
        }
    }

    private static class FittingBlock
    implements Comparable<FittingBlock> {
        private final double matchConfidence;
        private final TextBlock match;

        public FittingBlock(double matchConfidence, TextBlock match) {
            this.matchConfidence = matchConfidence;
            this.match = match;
        }

        public TextBlock getMatch() {
            return this.match;
        }

        public double getMatchConfidence() {
            return this.matchConfidence;
        }

        @Override
        public int compareTo(FittingBlock o) {
            return Double.compare(this.matchConfidence, o.matchConfidence);
        }
    }
}

