/*
 * Decompiled with CFR 0.152.
 */
package de.digitalcollections.solrocr.formats.hocr;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import de.digitalcollections.solrocr.formats.OcrPassageFormatter;
import de.digitalcollections.solrocr.formats.hocr.HocrClassBreakIterator;
import de.digitalcollections.solrocr.formats.hocr.HocrPassageFormatter;
import de.digitalcollections.solrocr.iter.ContextBreakIterator;
import de.digitalcollections.solrocr.lucene.filters.DehyphenatingHtmlCharFilterFactory;
import de.digitalcollections.solrocr.model.OcrBlock;
import de.digitalcollections.solrocr.model.OcrFormat;
import java.io.Reader;
import java.text.BreakIterator;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.apache.lucene.analysis.util.CharFilterFactory;

public class HocrFormat
implements OcrFormat {
    private static final CharFilterFactory baseFilterFactory = new DehyphenatingHtmlCharFilterFactory();
    private static final Map<OcrBlock, Set<String>> blockClassMapping = ImmutableMap.builder().put((Object)OcrBlock.PAGE, (Object)ImmutableSet.of((Object)"ocr_page")).put((Object)OcrBlock.BLOCK, (Object)ImmutableSet.of((Object)"ocr_carea", (Object)"ocrx_block")).put((Object)OcrBlock.SECTION, (Object)ImmutableSet.of((Object)"ocr_chapter", (Object)"ocr_section", (Object)"ocr_subsection", (Object)"ocr_subsubsection")).put((Object)OcrBlock.PARAGRAPH, (Object)ImmutableSet.of((Object)"ocr_par")).put((Object)OcrBlock.LINE, (Object)ImmutableSet.of((Object)"ocr_line", (Object)"ocrx_line")).put((Object)OcrBlock.WORD, (Object)ImmutableSet.of((Object)"ocrx_word")).build();
    private static final Pattern TITLE_PAT = Pattern.compile("<title>.*?</title>");

    @Override
    public BreakIterator getBreakIterator(OcrBlock breakBlock, OcrBlock limitBlock, int contextSize) {
        Set<String> breakClasses = blockClassMapping.get((Object)breakBlock);
        Set<String> limitClasses = limitBlock == null ? null : blockClassMapping.get((Object)limitBlock);
        return new ContextBreakIterator(new HocrClassBreakIterator(breakClasses), limitClasses != null ? new HocrClassBreakIterator(limitClasses) : null, contextSize);
    }

    @Override
    public OcrPassageFormatter getPassageFormatter(String prehHighlightTag, String postHighlightTag, boolean absoluteHighlights, boolean alignSpans) {
        return new HocrPassageFormatter(prehHighlightTag, postHighlightTag, absoluteHighlights, alignSpans);
    }

    @Override
    public Reader filter(Reader input) {
        Reader filtered = baseFilterFactory.create(input);
        return new PatternReplaceCharFilter(TITLE_PAT, "", filtered);
    }

    @Override
    public boolean hasFormat(String ocrChunk) {
        return blockClassMapping.values().stream().flatMap(Collection::stream).anyMatch(ocrChunk::contains);
    }
}

