package org.apache.joshua.decoder.segment_file;

import cern.colt.matrix.impl.AbstractFormatter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.LanguageModelStateManager;
import org.apache.joshua.decoder.ff.lm.ArpaNgram;
import org.apache.joshua.decoder.ff.tm.Grammar;
import org.apache.joshua.lattice.Arc;
import org.apache.joshua.lattice.Lattice;
import org.apache.joshua.lattice.Node;
import org.apache.joshua.util.ChartSpan;
import org.apache.joshua.util.Constants;
import org.apache.joshua.util.FormatUtils;
import org.apache.joshua.util.Regex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:joshua-incubating-6.1.jar:org/apache/joshua/decoder/segment_file/Sentence.class */
public class Sentence {
    private static final Logger LOG;
    public int id;
    protected String source;
    protected String target;
    protected String[] references;
    private final List<ConstraintSpan> constraints;
    public JoshuaConfiguration config;
    protected static final Pattern SEG_START;
    protected static final Pattern SEG_END;
    static final /* synthetic */ boolean $assertionsDisabled;
    protected String fullSource = null;
    protected String fullTarget = null;
    protected Lattice<Token> sourceLattice = null;
    private LanguageModelStateManager stateManager = new LanguageModelStateManager();

    public Sentence(String str, int i, JoshuaConfiguration joshuaConfiguration) {
        this.id = -1;
        this.source = null;
        this.target = null;
        this.references = null;
        this.config = null;
        String trim = Regex.spaces.replaceAll(str, AbstractFormatter.DEFAULT_COLUMN_SEPARATOR).trim();
        this.config = joshuaConfiguration;
        this.constraints = new LinkedList();
        Matcher matcher = SEG_START.matcher(trim);
        if (matcher.find()) {
            this.source = SEG_END.matcher(matcher.replaceFirst("")).replaceFirst("");
            this.id = Integer.parseInt(matcher.group(1));
        } else if (trim.contains(" ||| ")) {
            String[] split = trim.split("\\s?\\|{3}\\s?");
            this.source = split[0];
            this.target = split[1];
            if (this.target.equals("")) {
                this.target = null;
            }
            if (split.length > 2) {
                this.references = new String[split.length - 2];
                System.arraycopy(split, 2, this.references, 0, split.length - 2);
            }
            this.id = i;
        } else {
            this.source = trim;
            this.id = i;
        }
        if (joshuaConfiguration.lattice_decoding && this.source.startsWith("(((")) {
            return;
        }
        adjustForLength(joshuaConfiguration.maxlen);
    }

    public boolean isLinearChain() {
        return !getLattice().hasMoreThanOnePath();
    }

    public int length() {
        return getLattice().getShortestDistance();
    }

    public String getAnnotation(int i, String str) {
        return getTokens().get(i).getAnnotation(str);
    }

    public void segmentOOVs(Grammar[] grammarArr) {
        Lattice<Token> lattice = getLattice();
        HashSet hashSet = new HashSet();
        for (Grammar grammar : grammarArr) {
            Iterator<Integer> terminalExtensionIterator = grammar.getTrieRoot().getTerminalExtensionIterator();
            while (terminalExtensionIterator.hasNext()) {
                hashSet.add(terminalExtensionIterator.next());
            }
        }
        List<Node<Token>> nodes = lattice.getNodes();
        for (int size = nodes.size() - 3; size >= 1; size--) {
            if (nodes.get(size).getOutgoingArcs().size() == 1) {
                Arc<Token> arc = nodes.get(size).getOutgoingArcs().get(0);
                String word = Vocabulary.word(arc.getLabel().getWord());
                if (!hashSet.contains(arc.getLabel())) {
                    List<Arc<Token>> outgoingArcs = nodes.get(size).getOutgoingArcs();
                    char[] charArray = word.toCharArray();
                    ChartSpan chartSpan = new ChartSpan(charArray.length + 1, false);
                    ArrayList arrayList = new ArrayList(charArray.length + 1);
                    arrayList.add(nodes.get(size));
                    for (int i = 1; i < charArray.length; i++) {
                        arrayList.add(new Node(i));
                    }
                    arrayList.add(nodes.get(size + 1));
                    for (int i2 = 1; i2 <= charArray.length; i2++) {
                        for (int i3 = 0; i3 <= charArray.length - i2; i3++) {
                            int i4 = i3 + i2;
                            if (i2 != charArray.length) {
                                Token token = new Token(word.substring(i3, i4), this.config);
                                if (hashSet.contains(Integer.valueOf(this.id))) {
                                    ((Node) arrayList.get(i3)).addArc((Node) arrayList.get(i4), ArpaNgram.DEFAULT_BACKOFF, token);
                                    chartSpan.set(i3, i4, true);
                                }
                            }
                            for (int i5 = i3 + 1; i5 < i4; i5++) {
                                if (((Boolean) chartSpan.get(i3, i5)).booleanValue() && ((Boolean) chartSpan.get(i5, i4)).booleanValue()) {
                                    chartSpan.set(i3, i4, true);
                                }
                            }
                        }
                    }
                    if (((Boolean) chartSpan.get(0, charArray.length)).booleanValue()) {
                        HashSet hashSet2 = new HashSet();
                        for (int i6 = 1; i6 < arrayList.size() - 1; i6++) {
                            if (!((Boolean) chartSpan.get(0, i6)).booleanValue() || !((Boolean) chartSpan.get(i6, charArray.length)).booleanValue()) {
                                arrayList.set(i6, null);
                            }
                        }
                        int i7 = 1;
                        while (i7 < arrayList.size()) {
                            if (arrayList.get(i7) == null) {
                                hashSet2.add(arrayList.get(i7));
                                arrayList.remove(i7);
                            } else {
                                i7++;
                            }
                        }
                        Iterator it = arrayList.iterator();
                        while (it.hasNext()) {
                            Node node = (Node) it.next();
                            int i8 = 0;
                            while (i8 != node.getOutgoingArcs().size()) {
                                if (hashSet2.contains(((Arc) node.getOutgoingArcs().get(i8)).getHead())) {
                                    node.getOutgoingArcs().remove(i8);
                                } else {
                                    i8++;
                                }
                            }
                        }
                        getLattice().insert(size, size + 1, arrayList);
                    } else {
                        ((Node) arrayList.get(0)).setOutgoingArcs(outgoingArcs);
                    }
                }
            }
        }
    }

    protected void adjustForLength(int i) {
        int size = getLattice().size() - 2;
        if (size > i) {
            LOG.warn("sentence {} too long {}, truncating to length {}", new Object[]{Integer.valueOf(id()), Integer.valueOf(size), Integer.valueOf(i)});
            String[] split = this.source.split(Constants.spaceSeparator);
            this.source = split[0];
            for (int i2 = 1; i2 < i; i2++) {
                this.source += AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + split[i2];
            }
            this.sourceLattice = null;
            if (this.target != null) {
                this.target = "";
            }
        }
    }

    public boolean isEmpty() {
        return this.source.matches("^\\s*$");
    }

    public int id() {
        return this.id;
    }

    public String rawSource() {
        return this.source;
    }

    public String source() {
        StringBuilder sb = new StringBuilder();
        int[] wordIDs = getWordIDs();
        for (int i = 1; i < wordIDs.length - 1; i++) {
            sb.append(Vocabulary.word(wordIDs[i])).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
        }
        return sb.toString().trim();
    }

    public String fullSource() {
        if (this.fullSource == null) {
            this.fullSource = FormatUtils.addSentenceMarkers(source());
        }
        return this.fullSource;
    }

    public String target() {
        return this.target;
    }

    public String fullTarget() {
        if (this.fullTarget == null) {
            this.fullTarget = FormatUtils.addSentenceMarkers(target());
        }
        return this.fullTarget;
    }

    public String source(int i, int i2) {
        StringTokenizer stringTokenizer = new StringTokenizer(fullSource());
        int i3 = 0;
        StringBuilder sb = new StringBuilder();
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (i3 >= i2) {
                break;
            }
            if (i3 >= i) {
                sb.append(nextToken).append(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
            }
            i3++;
        }
        return sb.toString().trim();
    }

    public String[] references() {
        return this.references;
    }

    public List<Token> getTokens() {
        if (!$assertionsDisabled && !isLinearChain()) {
            throw new AssertionError();
        }
        ArrayList arrayList = new ArrayList();
        for (Node<Token> node : getLattice().getNodes()) {
            if (node != null && node.getOutgoingArcs().size() > 0) {
                arrayList.add(node.getOutgoingArcs().get(0).getLabel());
            }
        }
        return arrayList;
    }

    public int[] getWordIDs() {
        List<Token> tokens = getTokens();
        int[] iArr = new int[tokens.size()];
        for (int i = 0; i < tokens.size(); i++) {
            iArr[i] = tokens.get(i).getWord();
        }
        return iArr;
    }

    public Lattice<String> stringLattice() {
        if ($assertionsDisabled || isLinearChain()) {
            return Lattice.createStringLatticeFromString(source(), this.config);
        }
        throw new AssertionError();
    }

    public List<ConstraintSpan> constraints() {
        return this.constraints;
    }

    public Lattice<Token> getLattice() {
        if (this.sourceLattice == null) {
            if (!this.config.lattice_decoding || !rawSource().startsWith("(((")) {
                this.sourceLattice = Lattice.createTokenLatticeFromString(String.format("%s %s %s", "<s>", rawSource(), "</s>"), this.config);
            } else {
                if (this.config.search_algorithm.equals("stack")) {
                    throw new RuntimeException("* FATAL: lattice decoding currently not supported for stack-based search algorithm.");
                }
                this.sourceLattice = Lattice.createTokenLatticeFromPLF(rawSource(), this.config);
            }
        }
        return this.sourceLattice;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder(source());
        if (target() != null) {
            sb.append(" ||| ").append(target());
        }
        return sb.toString();
    }

    public boolean hasPath(int i, int i2) {
        return getLattice().distance(i, i2) != -1;
    }

    public Node<Token> getNode(int i) {
        return getLattice().getNode(i);
    }

    public LanguageModelStateManager getStateManager() {
        return this.stateManager;
    }

    static {
        $assertionsDisabled = !Sentence.class.desiredAssertionStatus();
        LOG = LoggerFactory.getLogger(Sentence.class);
        SEG_START = Pattern.compile("^\\s*<seg\\s+id=\"?(\\d+)\"?[^>]*>\\s*");
        SEG_END = Pattern.compile("\\s*</seg\\s*>\\s*$");
    }
}
