package org.apache.joshua.decoder.ff.lm;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.util.Regex;
import org.apache.joshua.util.io.LineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:joshua-incubating-6.1.jar:org/apache/joshua/decoder/ff/lm/ArpaFile.class */
public class ArpaFile implements Iterable<ArpaNgram> {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) ArpaFile.class);
    public static final Regex BLANK_LINE = new Regex("^\\s*$");
    public static final Regex NGRAM_HEADER = new Regex("^\\\\\\d-grams:\\s*$");
    public static final Regex NGRAM_END = new Regex("^\\\\end\\\\s*$");
    private final File arpaFile;
    private final Vocabulary vocab;

    public ArpaFile(String str, Vocabulary vocabulary) {
        this.arpaFile = new File(str);
        this.vocab = vocabulary;
    }

    public ArpaFile(String str) throws IOException {
        this.arpaFile = new File(str);
        this.vocab = new Vocabulary();
        LineReader lineReader = new LineReader(str);
        try {
            Iterator<String> it = lineReader.iterator();
            while (it.hasNext()) {
                String next = it.next();
                String[] split = Regex.spaces.split(next);
                if (split.length > 1) {
                    for (String str2 : Regex.spaces.split(split[1])) {
                        LOG.debug("Adding to vocab: {}", str2);
                        Vocabulary.addAll(str2);
                    }
                } else {
                    LOG.info(next);
                }
            }
            LOG.info("Done constructing ArpaFile");
        } finally {
            lineReader.close();
        }
    }

    public Vocabulary getVocab() {
        return this.vocab;
    }

    public int size() {
        LOG.debug("Counting n-grams in ARPA file");
        int i = 0;
        Iterator<ArpaNgram> it = iterator();
        while (it.hasNext()) {
            it.next();
            i++;
        }
        LOG.debug("Done counting n-grams in ARPA file");
        return i;
    }

    public int getOrder() throws FileNotFoundException {
        Pattern compile = Pattern.compile("^ngram (\\d+)=\\d+$");
        LOG.debug("Pattern is {}", compile);
        Scanner scanner = new Scanner(this.arpaFile);
        int i = 0;
        while (scanner.hasNextLine()) {
            String nextLine = scanner.nextLine();
            if (NGRAM_HEADER.matches(nextLine)) {
                break;
            }
            Matcher matcher = compile.matcher(nextLine);
            if (matcher.matches()) {
                LOG.debug("DOES  match: '{}'", nextLine);
                i = Integer.valueOf(matcher.group(1)).intValue();
            } else {
                LOG.debug("Doesn't match: '{}'", nextLine);
            }
        }
        return i;
    }

    @Override // java.lang.Iterable
    public Iterator<ArpaNgram> iterator() {
        try {
            Scanner scanner = this.arpaFile.getName().endsWith("gz") ? new Scanner(new GZIPInputStream(new FileInputStream(this.arpaFile))) : new Scanner(this.arpaFile);
            while (scanner.hasNextLine()) {
                String nextLine = scanner.nextLine();
                LOG.debug("Discarding line: {}", nextLine);
                if (NGRAM_HEADER.matches(nextLine)) {
                    break;
                }
            }
            final Scanner scanner2 = scanner;
            return new Iterator<ArpaNgram>() { // from class: org.apache.joshua.decoder.ff.lm.ArpaFile.1
                String nextLine = null;
                int ngramOrder = 1;

                @Override // java.util.Iterator
                public boolean hasNext() {
                    if (!scanner2.hasNext()) {
                        this.nextLine = null;
                        return false;
                    }
                    String nextLine2 = scanner2.nextLine();
                    boolean z = ArpaFile.NGRAM_HEADER.matches(nextLine2) || ArpaFile.NGRAM_END.matches(nextLine2);
                    while (true) {
                        boolean z2 = z;
                        if (!z2 && !ArpaFile.BLANK_LINE.matches(nextLine2)) {
                            this.nextLine = nextLine2;
                            return true;
                        }
                        if (z2) {
                            this.ngramOrder++;
                        }
                        if (!scanner2.hasNext()) {
                            this.nextLine = null;
                            return false;
                        }
                        nextLine2 = scanner2.nextLine().trim();
                        z = ArpaFile.NGRAM_HEADER.matches(nextLine2) || ArpaFile.NGRAM_END.matches(nextLine2);
                    }
                }

                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.Iterator
                public ArpaNgram next() {
                    if (this.nextLine == null) {
                        throw new NoSuchElementException();
                    }
                    String[] split = Regex.spaces.split(this.nextLine);
                    float floatValue = Float.valueOf(split[0]).floatValue();
                    int id = Vocabulary.id(split[this.ngramOrder]);
                    int[] iArr = new int[this.ngramOrder - 1];
                    for (int i = 1; i < this.ngramOrder; i++) {
                        iArr[i - 1] = Vocabulary.id(split[i]);
                    }
                    float floatValue2 = split.length > this.ngramOrder + 1 ? Float.valueOf(split[split.length - 1]).floatValue() : 0.0f;
                    this.nextLine = null;
                    return new ArpaNgram(id, iArr, floatValue, floatValue2);
                }

                @Override // java.util.Iterator
                public void remove() {
                    throw new UnsupportedOperationException();
                }
            };
        } catch (IOException e) {
            LOG.error(e.getMessage(), (Throwable) e);
            return null;
        }
    }
}
