/*
 * Decompiled with CFR 0.152.
 */
package de.datexis.parvec.encoder;

import de.datexis.common.WordHelpers;
import de.datexis.model.Sentence;
import de.datexis.model.Span;
import de.datexis.model.Token;
import de.datexis.parvec.encoder.ParVecEncoder;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParVecWordsEncoder
extends ParVecEncoder {
    protected static final Logger log = LoggerFactory.getLogger(ParVecWordsEncoder.class);

    @Override
    public long getEmbeddingVectorSize() {
        return this.layerSize;
    }

    @Override
    public INDArray encode(Span span) {
        if (span instanceof Sentence) {
            String text = ((Sentence)span).toTokenizedString().trim().replaceAll("\n", " *NL* ").replaceAll("\t", " *t* ");
            INDArray sum = Nd4j.zeros((long)this.getEmbeddingVectorSize(), (long)1L);
            int len = 0;
            for (String w : WordHelpers.splitSpaces((String)text)) {
                INDArray arr;
                if (w.trim().isEmpty() || (arr = this.model.getWordVectorMatrix(preprocessor.preProcess(w))) == null) continue;
                sum.addi(arr.transpose());
                ++len;
            }
            return len == 0 ? sum : sum.div((Number)len);
        }
        if (span instanceof Token) {
            INDArray arr = this.model.getWordVectorMatrix(preprocessor.preProcess(((Token)span).getText()));
            if (arr != null) {
                return arr;
            }
            return Nd4j.zeros((long)this.getEmbeddingVectorSize(), (long)1L);
        }
        return this.encode(span.getText());
    }
}

