/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.icrawl.contentanalysis;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import de.l3s.icrawl.contentanalysis.DocumentVector;
import de.l3s.icrawl.contentanalysis.LanguageModel;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LanguageModels {
    private static final Logger logger = LoggerFactory.getLogger(LanguageModels.class);
    private final Locale defaultLanguage;
    private final Map<Locale, LanguageModel> models;

    public LanguageModels(Locale language, Map<String, Double> idfDictionary, Locale defaultLanguage) {
        this(language, (Map<Locale, LanguageModel>)ImmutableMap.of((Object)language, (Object)new LanguageModel(LanguageModels.getAnalyzerForLanguage(language), idfDictionary)));
    }

    public LanguageModels(Locale language, Map<Locale, LanguageModel> modelsMap) {
        this.models = modelsMap;
        this.defaultLanguage = language;
    }

    public Locale getDefaultLanguage() {
        return this.defaultLanguage;
    }

    public double getSimilarity(Locale language, String doc, DocumentVector reference, LanguageModel.KeywordMatcher matcher) {
        Preconditions.checkArgument((!doc.isEmpty() ? 1 : 0) != 0, (Object)"Document must have length > 0.");
        LanguageModel model = this.getLanguageModel(language);
        DocumentVector documentVector = model.buildDocumentVector(doc, matcher);
        double documentSimilarity = reference.cosineSimilarity(documentVector);
        logger.trace("result: {}", (Object)documentSimilarity);
        if (Double.isInfinite(documentSimilarity) || Double.isNaN(documentSimilarity)) {
            logger.debug("Got NaN similarity for input '{}'@{}: {}", new Object[]{doc, language, documentSimilarity});
            return 0.0;
        }
        return documentSimilarity;
    }

    LanguageModel getLanguageModel(Locale language) {
        LanguageModel model = this.models.get(language);
        if (model == null) {
            model = this.models.get(this.defaultLanguage);
        }
        if (model == null) {
            throw new IllegalArgumentException("Could not find model for language " + language);
        }
        return model;
    }

    static Analyzer getAnalyzerForLanguage(Locale lang) {
        if (Locale.GERMAN.equals(lang)) {
            return new GermanAnalyzer(CharArraySet.EMPTY_SET);
        }
        if (Locale.ENGLISH.equals(lang)) {
            return new EnglishAnalyzer();
        }
        if (Locale.ITALIAN.equals(lang)) {
            return new ItalianAnalyzer();
        }
        if (Locale.FRENCH.equals(lang)) {
            return new FrenchAnalyzer();
        }
        throw new IllegalArgumentException("Could not find model for language " + lang);
    }

    public LanguageModel.KeywordMatcher buildMatcher(Locale lang, Iterable<String> keywords, int ngramSize) {
        return this.getLanguageModel(lang).buildMatcher(keywords, ngramSize);
    }

    public DocumentVector buildDocumentVector(Locale language, String document, LanguageModel.KeywordMatcher keywordMatcher) {
        return this.getLanguageModel(language).buildDocumentVector(document, keywordMatcher);
    }

    public static Map<String, Double> readIdfDictionary(InputStream is) throws IOException {
        Pattern splitPattern = Pattern.compile("\\s+");
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 8048);){
            String line;
            ImmutableMap.Builder builder = ImmutableMap.builder();
            while ((line = reader.readLine()) != null) {
                String[] parts = splitPattern.split(line, 2);
                builder.put((Object)parts[0], (Object)Double.valueOf(parts[1]));
            }
            ImmutableMap immutableMap = builder.build();
            return immutableMap;
        }
    }

    public static LanguageModel readLanguageModel(Locale locale, InputStream idfIs) throws IOException {
        return new LanguageModel(LanguageModels.getAnalyzerForLanguage(locale), LanguageModels.readIdfDictionary(idfIs));
    }
}

