package org.apache.joshua.decoder.ff.lm;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.primitives.Ints;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Support;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.StatefulFF;
import org.apache.joshua.decoder.ff.lm.berkeley_lm.LMGrammarBerkeley;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.FormatUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:joshua-incubating-6.1.jar:org/apache/joshua/decoder/ff/lm/LanguageModelFF.class */
public class LanguageModelFF extends StatefulFF {
    static final Logger LOG = LoggerFactory.getLogger(LanguageModelFF.class);
    public static int LM_INDEX = 0;
    private int startSymbolId;
    protected NGramLanguageModel languageModel;
    protected static final String NAME_PREFIX = "lm_";
    protected static final String OOV_SUFFIX = "_oov";
    protected final String oovFeatureName;
    protected final int ngramOrder;
    protected final float weight;
    protected final float oovWeight;
    protected String type;
    protected final String path;
    protected boolean isClassLM;
    private ClassMap classMap;
    protected boolean withOovFeature;
    protected int oovDenseFeatureIndex;

    public LanguageModelFF(FeatureVector featureVector, String[] strArr, JoshuaConfiguration joshuaConfiguration) {
        super(featureVector, NAME_PREFIX + LM_INDEX, strArr, joshuaConfiguration);
        this.oovDenseFeatureIndex = -1;
        this.oovFeatureName = NAME_PREFIX + LM_INDEX + OOV_SUFFIX;
        LM_INDEX++;
        this.type = this.parsedArgs.get("lm_type");
        this.ngramOrder = Integer.parseInt(this.parsedArgs.get("lm_order"));
        this.path = joshuaConfiguration.getFilePath(this.parsedArgs.get("lm_file"));
        if (this.parsedArgs.containsKey("class_map")) {
            this.isClassLM = true;
            this.classMap = new ClassMap(this.parsedArgs.get("class_map"));
        }
        if (this.parsedArgs.containsKey("oov_feature")) {
            this.withOovFeature = true;
        }
        this.weight = featureVector.getSparse(this.name);
        this.oovWeight = featureVector.getSparse(this.oovFeatureName);
        initializeLM();
    }

    @Override // org.apache.joshua.decoder.ff.FeatureFunction
    public ArrayList<String> reportDenseFeatures(int i) {
        this.denseFeatureIndex = i;
        this.oovDenseFeatureIndex = this.denseFeatureIndex + 1;
        ArrayList<String> arrayList = new ArrayList<>(2);
        arrayList.add(this.name);
        if (this.withOovFeature) {
            arrayList.add(this.oovFeatureName);
        }
        return arrayList;
    }

    protected void initializeLM() {
        String str = this.type;
        boolean z = -1;
        switch (str.hashCode()) {
            case 101934805:
                if (str.equals("kenlm")) {
                    z = false;
                    break;
                }
                break;
            case 337180440:
                if (str.equals("berkeleylm")) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                this.languageModel = new KenLM(this.ngramOrder, this.path);
                break;
            case true:
                this.languageModel = new LMGrammarBerkeley(this.ngramOrder, this.path);
                break;
            default:
                throw new RuntimeException(String.format("* FATAL: Invalid backend lm_type '%s' for LanguageModel", this.type) + "*        Permissible values for 'lm_type' are 'kenlm' and 'berkeleylm'");
        }
        Vocabulary.registerLanguageModel(this.languageModel);
        Vocabulary.id(this.config.default_non_terminal);
        this.startSymbolId = Vocabulary.id("<s>");
    }

    public NGramLanguageModel getLM() {
        return this.languageModel;
    }

    public boolean isClassLM() {
        return this.isClassLM;
    }

    @Override // org.apache.joshua.decoder.ff.FeatureFunction
    public String logString() {
        return String.format("%s, order %d (weight %.3f), classLm=%s", this.name, Integer.valueOf(this.languageModel.getOrder()), Float.valueOf(this.weight), Boolean.valueOf(this.isClassLM));
    }

    @Override // org.apache.joshua.decoder.ff.StatefulFF, org.apache.joshua.decoder.ff.FeatureFunction
    public DPState compute(Rule rule, List<HGNode> list, int i, int i2, SourcePath sourcePath, Sentence sentence, FeatureFunction.Accumulator accumulator) {
        if (rule == null) {
            return null;
        }
        int[] tags = this.config.source_annotations ? getTags(rule, i, i2, sentence) : getRuleIds(rule);
        if (this.withOovFeature) {
            accumulator.add(this.oovDenseFeatureIndex, getOovs(tags));
        }
        return computeTransition(tags, list, accumulator);
    }

    @VisibleForTesting
    public int[] getRuleIds(Rule rule) {
        return this.isClassLM ? getClasses(rule) : rule.getEnglish();
    }

    @VisibleForTesting
    public int getOovs(int[] iArr) {
        int i = 0;
        for (int i2 : iArr) {
            if (!FormatUtils.isNonterminal(i2) && this.languageModel.isOov(i2)) {
                i++;
            }
        }
        return i;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int[] getTags(Rule rule, int i, int i2, Sentence sentence) {
        String annotation;
        int[] copyOf = Arrays.copyOf(rule.getEnglish(), rule.getEnglish().length);
        byte[] alignment = rule.getAlignment();
        if (alignment != null) {
            for (int i3 = 0; i3 < copyOf.length; i3++) {
                if (copyOf[i3] > 0) {
                    int i4 = 0;
                    while (true) {
                        if (i4 >= alignment.length) {
                            break;
                        }
                        if (alignment[i4] == i3 && (annotation = sentence.getAnnotation(alignment[i3] + i, "class")) != null) {
                            copyOf[i3] = Vocabulary.id(annotation);
                            break;
                        }
                        i4 += 2;
                    }
                }
            }
        }
        return copyOf;
    }

    public void setClassMap(String str) throws IOException {
        this.classMap = new ClassMap(str);
    }

    protected int[] getClasses(Rule rule) {
        if (this.classMap == null) {
            throw new RuntimeException("The class map is not set. Cannot use the class LM ");
        }
        int[] copyOf = Arrays.copyOf(rule.getEnglish(), rule.getEnglish().length);
        for (int i = 0; i < copyOf.length; i++) {
            if (copyOf[i] > 0) {
                copyOf[i] = this.classMap.getClassID(copyOf[i]);
            }
        }
        return copyOf;
    }

    @Override // org.apache.joshua.decoder.ff.StatefulFF, org.apache.joshua.decoder.ff.FeatureFunction
    public DPState computeFinal(HGNode hGNode, int i, int i2, SourcePath sourcePath, Sentence sentence, FeatureFunction.Accumulator accumulator) {
        return computeFinalTransition((NgramDPState) hGNode.getDPState(this.stateIndex), accumulator);
    }

    @Override // org.apache.joshua.decoder.ff.FeatureFunction
    public float estimateCost(Rule rule) {
        float f = 0.0f;
        int[] ruleIds = getRuleIds(rule);
        ArrayList arrayList = new ArrayList();
        boolean z = ruleIds[0] == this.startSymbolId;
        for (int i : ruleIds) {
            if (FormatUtils.isNonterminal(i)) {
                f += scoreChunkLogP((List<Integer>) arrayList, true, z);
                arrayList.clear();
                z = false;
            } else {
                arrayList.add(Integer.valueOf(i));
            }
        }
        return (this.weight * (f + scoreChunkLogP((List<Integer>) arrayList, true, z))) + (this.oovWeight * (this.withOovFeature ? getOovs(ruleIds) : ArpaNgram.DEFAULT_BACKOFF));
    }

    @Override // org.apache.joshua.decoder.ff.StatefulFF, org.apache.joshua.decoder.ff.FeatureFunction
    public float estimateFutureCost(Rule rule, DPState dPState, Sentence sentence) {
        float f = 0.0f;
        int[] leftLMStateWords = ((NgramDPState) dPState).getLeftLMStateWords();
        if (null != leftLMStateWords) {
            boolean z = true;
            if (leftLMStateWords[0] != this.startSymbolId) {
                z = false;
            }
            f = ArpaNgram.DEFAULT_BACKOFF + scoreChunkLogP(leftLMStateWords, true, z);
        }
        return this.weight * f;
    }

    private NgramDPState computeTransition(int[] iArr, List<HGNode> list, FeatureFunction.Accumulator accumulator) {
        int[] iArr2 = new int[this.ngramOrder];
        int[] iArr3 = new int[this.ngramOrder];
        int i = 0;
        float f = 0.0f;
        int[] iArr4 = null;
        for (int i2 : iArr) {
            if (FormatUtils.isNonterminal(i2)) {
                NgramDPState ngramDPState = (NgramDPState) list.get(-(i2 + 1)).getDPState(this.stateIndex);
                int[] leftLMStateWords = ngramDPState.getLeftLMStateWords();
                int[] rightLMStateWords = ngramDPState.getRightLMStateWords();
                for (int i3 : leftLMStateWords) {
                    int i4 = i;
                    i++;
                    iArr2[i4] = i3;
                    if (iArr4 == null && i == this.ngramOrder - 1) {
                        iArr4 = Arrays.copyOf(iArr2, i);
                    }
                    if (i == this.ngramOrder) {
                        f += this.languageModel.ngramLogProbability(iArr2, this.ngramOrder);
                        System.arraycopy(iArr2, 1, iArr3, 0, this.ngramOrder - 1);
                        int[] iArr5 = iArr2;
                        iArr2 = iArr3;
                        iArr3 = iArr5;
                        i--;
                    }
                }
                System.arraycopy(rightLMStateWords, 0, iArr2, i - rightLMStateWords.length, rightLMStateWords.length);
            } else {
                int i5 = i;
                i++;
                iArr2[i5] = i2;
                if (iArr4 == null && i == this.ngramOrder - 1) {
                    iArr4 = Arrays.copyOf(iArr2, i);
                }
                if (i == this.ngramOrder) {
                    f += this.languageModel.ngramLogProbability(iArr2, this.ngramOrder);
                    System.arraycopy(iArr2, 1, iArr3, 0, this.ngramOrder - 1);
                    int[] iArr6 = iArr2;
                    iArr2 = iArr3;
                    iArr3 = iArr6;
                    i--;
                }
            }
        }
        accumulator.add(this.denseFeatureIndex, f);
        if (iArr4 != null) {
            return new NgramDPState(iArr4, Arrays.copyOfRange(iArr2, (i - this.ngramOrder) + 1, i));
        }
        int[] copyOf = Arrays.copyOf(iArr2, i);
        return new NgramDPState(copyOf, copyOf);
    }

    private NgramDPState computeFinalTransition(NgramDPState ngramDPState, FeatureFunction.Accumulator accumulator) {
        float f = 0.0f;
        LinkedList linkedList = new LinkedList();
        int[] leftLMStateWords = ngramDPState.getLeftLMStateWords();
        int[] rightLMStateWords = ngramDPState.getRightLMStateWords();
        for (int i : leftLMStateWords) {
            linkedList.add(Integer.valueOf(i));
            if (linkedList.size() >= 2) {
                f += this.languageModel.ngramLogProbability(Support.toArray(linkedList), linkedList.size());
            }
            if (linkedList.size() == this.ngramOrder) {
                linkedList.removeFirst();
            }
        }
        accumulator.add(this.denseFeatureIndex, f);
        return new NgramDPState(leftLMStateWords, rightLMStateWords);
    }

    private float scoreChunkLogP(List<Integer> list, boolean z, boolean z2) {
        return scoreChunkLogP(Ints.toArray(list), z, z2);
    }

    private float scoreChunkLogP(int[] iArr, boolean z, boolean z2) {
        float f = 0.0f;
        if (iArr.length > 0) {
            f = this.languageModel.sentenceLogProbability(iArr, this.ngramOrder, !z ? this.ngramOrder : z2 ? 2 : 1);
        }
        return f;
    }

    public static void resetLmIndex() {
        LM_INDEX = 0;
    }
}
