/*
 * Decompiled with CFR 0.152.
 */
package de.digitalcollections.solrocr.formats.mini;

import com.google.common.collect.ImmutableMap;
import de.digitalcollections.solrocr.formats.OcrPassageFormatter;
import de.digitalcollections.solrocr.formats.mini.MiniOcrPassageFormatter;
import de.digitalcollections.solrocr.iter.ContextBreakIterator;
import de.digitalcollections.solrocr.iter.TagBreakIterator;
import de.digitalcollections.solrocr.lucene.filters.DehyphenatingHtmlCharFilterFactory;
import de.digitalcollections.solrocr.model.OcrBlock;
import de.digitalcollections.solrocr.model.OcrFormat;
import java.io.Reader;
import java.text.BreakIterator;
import java.util.Map;
import org.apache.lucene.analysis.util.CharFilterFactory;

public class MiniOcrFormat
implements OcrFormat {
    private static final CharFilterFactory filterFactory = new DehyphenatingHtmlCharFilterFactory();
    private static final Map<OcrBlock, String> blockTagMapping = ImmutableMap.of((Object)((Object)OcrBlock.PAGE), (Object)"p", (Object)((Object)OcrBlock.SECTION), (Object)"s", (Object)((Object)OcrBlock.BLOCK), (Object)"b", (Object)((Object)OcrBlock.LINE), (Object)"l", (Object)((Object)OcrBlock.WORD), (Object)"w");

    @Override
    public BreakIterator getBreakIterator(OcrBlock breakBlock, OcrBlock limitBlock, int contextSize) {
        String breakTag = blockTagMapping.get((Object)breakBlock);
        String limitTag = limitBlock == null ? null : blockTagMapping.get((Object)limitBlock);
        return new ContextBreakIterator(new TagBreakIterator(breakTag), limitTag != null ? new TagBreakIterator(limitTag) : null, contextSize);
    }

    @Override
    public OcrPassageFormatter getPassageFormatter(String prehHighlightTag, String postHighlightTag, boolean absoluteHighlights, boolean alignSpans) {
        return new MiniOcrPassageFormatter(prehHighlightTag, postHighlightTag, absoluteHighlights, alignSpans);
    }

    @Override
    public Reader filter(Reader input) {
        return filterFactory.create(input);
    }

    @Override
    public boolean hasFormat(String ocrChunk) {
        return blockTagMapping.values().stream().anyMatch(t -> ocrChunk.contains("<" + t + " ") || ocrChunk.contains("<" + t + ">"));
    }
}

