/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.chunk;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.NBestChunker;
import com.aliasi.chunk.RescoringChunker;
import com.aliasi.lm.LanguageModel;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

public class AbstractCharLmRescoringChunker<B extends NBestChunker, O extends LanguageModel.Process, C extends LanguageModel.Sequence>
extends RescoringChunker<B> {
    final Map<String, Character> mTypeToChar;
    final Map<String, C> mTypeToLM;
    final O mOutLM;
    static final char UNKNOWN_TYPE_CHAR = '\uffff';
    static final char BOS_CHAR = '\ufffe';
    static final char EOS_CHAR = '\ufffd';

    public AbstractCharLmRescoringChunker(B baseNBestChunker, int numChunkingsRescored, O outLM, Map<String, Character> typeToChar, Map<String, C> typeToLM) {
        super(baseNBestChunker, numChunkingsRescored);
        this.mOutLM = outLM;
        this.mTypeToChar = typeToChar;
        this.mTypeToLM = typeToLM;
    }

    public char typeToChar(String chunkType) {
        Character result = this.mTypeToChar.get(chunkType);
        if (result == null) {
            return '\uffff';
        }
        return result.charValue();
    }

    public O outLM() {
        return this.mOutLM;
    }

    public C chunkLM(String chunkType) {
        return (C)((LanguageModel.Sequence)this.mTypeToLM.get(chunkType));
    }

    @Override
    public double rescore(Chunking chunking) {
        String text = chunking.charSequence().toString();
        double logProb = 0.0;
        int pos = 0;
        char prevTagChar = '\ufffe';
        for (Chunk chunk : AbstractCharLmRescoringChunker.orderedSet(chunking)) {
            int start = chunk.start();
            int end = chunk.end();
            String chunkType = chunk.type();
            char tagChar = this.typeToChar(chunkType);
            logProb += this.outLMEstimate(text.substring(pos, start), prevTagChar, tagChar);
            if (this.mTypeToLM.get(chunkType) == null) {
                System.out.println("\nFound null lm for type=" + chunkType + " Full type set =" + this.mTypeToLM.keySet());
                System.out.println("Chunking=" + chunking);
            }
            logProb += this.typeLMEstimate(chunkType, text.substring(start, end));
            pos = end;
            prevTagChar = tagChar;
        }
        return logProb += this.outLMEstimate(text.substring(pos), prevTagChar, '\ufffd');
    }

    double typeLMEstimate(String type, String text) {
        LanguageModel.Sequence lm = (LanguageModel.Sequence)this.mTypeToLM.get(type);
        if (lm == null) {
            String msg = "Found null lm for type=" + type + " Full type set =" + this.mTypeToLM.keySet();
            System.out.println("TypeLM Estimate:\n" + msg);
            return -16.0 * (double)text.length();
        }
        double estimate = lm.log2Estimate(text);
        return estimate;
    }

    double outLMEstimate(String text, char prevTagChar, char nextTagChar) {
        String seq = String.valueOf(prevTagChar) + text + nextTagChar;
        String start = seq.substring(0, 1);
        double estimate = this.mOutLM.log2Estimate(seq) - this.mOutLM.log2Estimate(start);
        return estimate;
    }

    static char[] wrapText(String text, char prevTagChar, char nextTagChar) {
        char[] cs = new char[text.length() + 2];
        cs[0] = prevTagChar;
        cs[cs.length - 1] = nextTagChar;
        int i = 0;
        while (i < text.length()) {
            cs[i + 1] = text.charAt(i);
            ++i;
        }
        return cs;
    }

    static Set<Chunk> orderedSet(Chunking chunking) {
        TreeSet<Chunk> orderedChunkSet = new TreeSet<Chunk>(Chunk.TEXT_ORDER_COMPARATOR);
        orderedChunkSet.addAll(chunking.chunkSet());
        return orderedChunkSet;
    }
}

