/*
 * Decompiled with CFR 0.152.
 */
package de.digitalcollections.solrocr.formats;

import com.google.common.collect.ImmutableSet;
import de.digitalcollections.solrocr.iter.IterableCharSequence;
import de.digitalcollections.solrocr.model.OcrBox;
import de.digitalcollections.solrocr.model.OcrPage;
import de.digitalcollections.solrocr.model.OcrSnippet;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Deque;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.apache.lucene.search.uhighlight.Passage;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class OcrPassageFormatter
extends PassageFormatter {
    protected static final Pattern LAST_INNER_TAG_PAT = Pattern.compile("[a-zA-Z0-9]</");
    protected static final Pattern TITLE_PAT = Pattern.compile("<title>.*?</title>");
    private static final Logger logger = LoggerFactory.getLogger(OcrPassageFormatter.class);
    protected final String startHlTag;
    protected final String endHlTag;
    protected final boolean absoluteHighlights;
    protected final boolean alignSpans;

    protected OcrPassageFormatter(String startHlTag, String endHlTag, boolean absoluteHighlights, boolean alignSpans) {
        this.startHlTag = startHlTag;
        this.endHlTag = endHlTag;
        this.absoluteHighlights = absoluteHighlights;
        this.alignSpans = alignSpans;
    }

    protected List<PassageMatch> mergeMatches(int numMatches, int[] matchStarts, int[] matchEnds) {
        Deque sortedMatches = IntStream.range(0, numMatches).mapToObj(idx -> new PassageMatch(matchStarts[idx], matchEnds[idx])).collect(Collectors.toCollection(ArrayDeque::new));
        ArrayDeque mergedMatches = new ArrayDeque();
        mergedMatches.add(sortedMatches.removeFirst());
        while (!sortedMatches.isEmpty()) {
            PassageMatch candidate = (PassageMatch)sortedMatches.removeFirst();
            if (!mergedMatches.isEmpty() && ((PassageMatch)mergedMatches.peekLast()).overlaps(candidate)) {
                ((PassageMatch)mergedMatches.peekLast()).merge(candidate);
                continue;
            }
            mergedMatches.add(candidate);
        }
        return new ArrayList<PassageMatch>(mergedMatches);
    }

    public OcrSnippet[] format(Passage[] passages, IterableCharSequence content) {
        OcrSnippet[] snippets = new OcrSnippet[passages.length];
        for (int i = 0; i < passages.length; ++i) {
            Passage passage = passages[i];
            try {
                snippets[i] = this.format(passage, content);
                continue;
            }
            catch (IndexOutOfBoundsException e) {
                String errorMsg = String.format("Could not create snippet (start=%d, end=%d) from content at '%s' due to an out-of-bounds error.\n\nDoes the file on disk correspond to the document that was used during indexing?", passage.getStartOffset(), passage.getEndOffset(), content.getIdentifier());
                logger.error(errorMsg, (Throwable)e);
            }
        }
        return snippets;
    }

    protected String getHighlightedFragment(Passage passage, IterableCharSequence content) {
        StringBuilder sb = new StringBuilder(content.subSequence(passage.getStartOffset(), passage.getEndOffset()));
        int extraChars = 0;
        if (passage.getNumMatches() > 0) {
            List<PassageMatch> matches = this.mergeMatches(passage.getNumMatches(), passage.getMatchStarts(), passage.getMatchEnds());
            for (PassageMatch match : matches) {
                String preMatchContent = content.subSequence(passage.getStartOffset(), match.start).toString();
                int matchStart = preMatchContent.length();
                if (this.alignSpans) {
                    matchStart = preMatchContent.lastIndexOf(">") + 1;
                }
                sb.insert(extraChars + matchStart, this.startHlTag);
                int matchEnd = content.subSequence(passage.getStartOffset(), match.end).toString().length();
                String matchText = sb.substring((extraChars += this.startHlTag.length()) + matchStart, extraChars + matchEnd);
                if (matchText.trim().endsWith(">")) {
                    Matcher m = LAST_INNER_TAG_PAT.matcher(matchText);
                    int idx = -1;
                    while (m.find()) {
                        idx = m.start() + 1;
                    }
                    if (idx > -1) {
                        matchEnd -= matchText.length() - idx;
                    }
                }
                matchEnd = Math.min(matchEnd + extraChars, sb.length());
                if (this.alignSpans && matchEnd != sb.length()) {
                    String postMatchContent = sb.substring(matchEnd, sb.length());
                    matchEnd += postMatchContent.indexOf("</");
                }
                sb.insert(matchEnd, this.endHlTag);
                extraChars += this.endHlTag.length();
            }
        }
        return sb.toString();
    }

    private OcrSnippet format(Passage passage, IterableCharSequence content) {
        OcrPage page;
        String xmlFragment = this.getHighlightedFragment(passage, content);
        OcrSnippet snip = this.parseFragment(xmlFragment, page = this.determineStartPage(xmlFragment, passage.getStartOffset(), content));
        if (snip != null) {
            snip.setScore(passage.getScore());
        }
        return snip;
    }

    protected String getTextFromXml(String xmlFragment) {
        HTMLStripCharFilter filter = new HTMLStripCharFilter((Reader)new StringReader(TITLE_PAT.matcher(xmlFragment).replaceAll("")), (Set)ImmutableSet.of((Object)this.startHlTag.substring(1, this.startHlTag.length() - 1)));
        try {
            String text = IOUtils.toString((Reader)filter);
            return StringEscapeUtils.unescapeXml(text).replaceAll("\n", "").replaceAll("\\s+", " ").trim();
        }
        catch (IOException e) {
            return xmlFragment;
        }
    }

    public abstract OcrPage determineStartPage(String var1, int var2, IterableCharSequence var3);

    protected OcrSnippet parseFragment(String ocrFragment, OcrPage page) {
        TreeMap<Integer, OcrPage> pages = this.parsePages(ocrFragment);
        List<OcrBox> allBoxes = this.parseWords(ocrFragment, pages, page.id);
        if (allBoxes.isEmpty()) {
            return null;
        }
        ArrayList byColumns = new ArrayList();
        ArrayList<OcrBox> currentCol = new ArrayList<OcrBox>();
        OcrBox prevBox = null;
        String pageId = null;
        for (OcrBox box : allBoxes) {
            boolean newPage;
            boolean newColumn = prevBox != null && box.getUly() + prevBox.getHeight() * 5.0f < prevBox.getUly();
            boolean bl = newPage = pageId != null && !box.getPageId().equals(pageId);
            if (newColumn || newPage) {
                byColumns.add(currentCol);
                currentCol = new ArrayList();
            }
            currentCol.add(box);
            if (box.getHeight() > 5.0f) {
                prevBox = box;
            }
            pageId = box.getPageId();
        }
        byColumns.add(currentCol);
        ArrayList<List<OcrBox>> hlSpans = new ArrayList<List<OcrBox>>();
        ArrayList<OcrBox> currentSpan = null;
        for (OcrBox wordBox : allBoxes) {
            if (wordBox.isInHighlight()) {
                boolean isInNewSpan;
                boolean bl = isInNewSpan = currentSpan == null || currentSpan.isEmpty() || !wordBox.getHighlightSpan().equals(((OcrBox)currentSpan.get(0)).getHighlightSpan());
                if (isInNewSpan) {
                    if (currentSpan != null && !currentSpan.isEmpty()) {
                        hlSpans.add(currentSpan);
                    }
                    currentSpan = new ArrayList<OcrBox>();
                }
                currentSpan.add(wordBox);
                continue;
            }
            if (currentSpan == null || currentSpan.isEmpty()) continue;
            hlSpans.add(currentSpan);
            currentSpan = null;
        }
        if (currentSpan != null && !currentSpan.isEmpty()) {
            hlSpans.add(currentSpan);
        }
        String highlightedText = this.getTextFromXml(ocrFragment);
        List<OcrBox> snippetRegions = byColumns.stream().map(this::determineSnippetRegion).filter(r -> !r.getText().isEmpty() && !r.getText().trim().isEmpty()).collect(Collectors.toList());
        Set snippetPageIds = snippetRegions.stream().map(OcrBox::getPageId).collect(Collectors.toSet());
        ArrayList<OcrPage> allPages = new ArrayList<OcrPage>();
        allPages.add(page);
        allPages.addAll(pages.values());
        List<OcrPage> snippetPages = allPages.stream().filter(p -> snippetPageIds.contains(p.id)).distinct().collect(Collectors.toList());
        OcrSnippet snip = new OcrSnippet(highlightedText, snippetPages, snippetRegions);
        this.addHighlightsToSnippet(hlSpans, snip);
        return snip;
    }

    private OcrBox determineSnippetRegion(List<OcrBox> wordBoxes) {
        float snipUlx = wordBoxes.stream().map(OcrBox::getUlx).min(Float::compareTo).get().floatValue();
        float snipUly = wordBoxes.stream().map(OcrBox::getUly).min(Float::compareTo).get().floatValue();
        float snipLrx = wordBoxes.stream().map(OcrBox::getLrx).max(Float::compareTo).get().floatValue();
        float snipLry = wordBoxes.stream().map(OcrBox::getLry).max(Float::compareTo).get().floatValue();
        String pageId = wordBoxes.get(0).getPageId();
        String regionText = wordBoxes.stream().filter(box -> !box.isHyphenated() || box.getHyphenStart() != false).map(box -> box.isHyphenated() ? box.getDehyphenatedForm() : box.getText()).collect(Collectors.joining(" "));
        OcrBox firstBox = wordBoxes.get(0);
        OcrBox lastBox = wordBoxes.get(wordBoxes.size() - 1);
        if (firstBox.isInHighlight() && !firstBox.getText().contains(this.startHlTag)) {
            regionText = this.startHlTag + regionText;
        }
        if (lastBox.isInHighlight() && !lastBox.getText().contains(this.endHlTag)) {
            regionText = regionText + this.endHlTag;
        }
        return new OcrBox(regionText, pageId, snipUlx, snipUly, snipLrx, snipLry, null);
    }

    protected abstract List<OcrBox> parseWords(String var1, TreeMap<Integer, OcrPage> var2, String var3);

    protected abstract TreeMap<Integer, OcrPage> parsePages(String var1);

    protected void addHighlightsToSnippet(List<List<OcrBox>> hlSpans, OcrSnippet snippet) {
        hlSpans.stream().flatMap(Collection::stream).forEach(box -> {
            Optional<OcrBox> region = snippet.getSnippetRegions().stream().filter(r -> r.contains((OcrBox)box)).findFirst();
            if (!region.isPresent()) {
                return;
            }
            if (!this.absoluteHighlights) {
                float xOffset = region.get().getUlx();
                float yOffset = region.get().getUly();
                box.setUlx(box.getUlx() - xOffset);
                box.setLrx(box.getLrx() - xOffset);
                box.setUly(box.getUly() - yOffset);
                box.setLry(box.getLry() - yOffset);
            }
            box.setParentRegionIdx(snippet.getSnippetRegions().indexOf(region.get()));
            box.setText(box.getText().replaceAll(this.startHlTag, "").replaceAll(this.endHlTag, ""));
        });
        hlSpans.forEach(span -> snippet.addHighlightSpan(this.mergeBoxes((List<OcrBox>)span)));
    }

    protected List<OcrBox> mergeBoxes(List<OcrBox> boxes) {
        if (boxes.size() < 2) {
            return boxes;
        }
        ArrayList<OcrBox> out = new ArrayList<OcrBox>();
        Iterator<OcrBox> it = boxes.iterator();
        OcrBox curBox = it.next();
        StringBuilder curText = new StringBuilder(curBox.getText());
        while (it.hasNext()) {
            boolean newPage;
            OcrBox nextBox = it.next();
            float lineHeight = curBox.getLry() - curBox.getUly();
            float yDiff = Math.abs(nextBox.getUly() - curBox.getUly());
            boolean newLine = (double)yDiff > 0.75 * (double)lineHeight;
            boolean bl = newPage = !StringUtils.equals((CharSequence)nextBox.getPageId(), (CharSequence)curBox.getPageId());
            if (newLine || newPage) {
                curBox.setText(curText.toString());
                out.add(curBox);
                curBox = nextBox;
                curText = new StringBuilder(curBox.getText());
                continue;
            }
            curText.append(" ");
            curText.append(nextBox.getText());
            if (nextBox.getLrx() > curBox.getLrx()) {
                curBox.setLrx(nextBox.getLrx());
            }
            if (nextBox.getLry() > curBox.getLry()) {
                curBox.setLry(nextBox.getLry());
            }
            if (!(nextBox.getUly() < curBox.getUly())) continue;
            curBox.setUly(nextBox.getUly());
        }
        curBox.setText(curText.toString());
        curBox.setPageId(null);
        out.add(curBox);
        return out;
    }

    public Object format(Passage[] passages, String content) {
        OcrSnippet[] snips = this.format(passages, IterableCharSequence.fromString(content));
        return Arrays.stream(snips).map(OcrSnippet::getText).toArray(String[]::new);
    }

    protected static class PassageMatch {
        public int start;
        public int end;

        public PassageMatch(int start, int end) {
            this.start = start;
            this.end = end;
        }

        public boolean overlaps(PassageMatch other) {
            int s1 = this.start;
            int e1 = this.end;
            int s2 = other.start;
            int e2 = other.end;
            return s1 <= s2 && s2 <= e1 || s1 <= e2 && e2 <= e1 || s2 <= s1 && s1 <= e2 && s2 <= e1 && e1 <= e2;
        }

        public void merge(PassageMatch other) {
            if (this.end < other.end) {
                this.end = other.end;
            } else if (this.start > other.start) {
                this.start = other.start;
            }
        }

        public String toString() {
            return String.format("PassageMatch{start=%d, end=%d}", this.start, this.end);
        }
    }
}

