/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.tokenization;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.ListUtil;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.annotation.IAnnotation;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.StartDocument;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.steps.tokenization.ITokenizer;
import net.sf.okapi.steps.tokenization.Parameters;
import net.sf.okapi.steps.tokenization.RbbiTokenizer;
import net.sf.okapi.steps.tokenization.Token;
import net.sf.okapi.steps.tokenization.Tokens;
import net.sf.okapi.steps.tokenization.TokensAnnotation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(value=Parameters.class)
public class TokenizationStep
extends BasePipelineStep {
    private static final Pattern APOSTROPHE = Pattern.compile("[\u2019']");
    private final Logger logger = LoggerFactory.getLogger(((Object)((Object)this)).getClass());
    private final ITokenizer tokenizer;
    private final ArrayList<Integer> positions;
    private final Parameters params = new Parameters();
    private LocaleId targetLocale;
    private LocaleId sourceLocale;

    public TokenizationStep() {
        this.setParameters((IParameters)this.params);
        this.tokenizer = new RbbiTokenizer();
        this.positions = new ArrayList();
    }

    protected Event handleStartDocument(Event event) {
        StartDocument sd = (StartDocument)event.getResource();
        if (sd != null) {
            this.sourceLocale = sd.getLocale();
        }
        return event;
    }

    protected Event handleTextUnit(Event event) {
        if ((event = super.handleTextUnit(event)) == null) {
            return null;
        }
        ITextUnit tu = event.getTextUnit();
        if (tu == null) {
            return event;
        }
        if (tu.isEmpty()) {
            return event;
        }
        if (!tu.isTranslatable()) {
            return event;
        }
        if (this.params.isTokenizeSource()) {
            this.tokenizeSource(tu);
        }
        if (this.params.isTokenizeTargets()) {
            this.tokenizeTargets(tu);
        }
        return event;
    }

    public LocaleId getSourceLocale() {
        return this.sourceLocale;
    }

    @StepParameterMapping(parameterType=StepParameterType.SOURCE_LOCALE)
    public void setSourceLocale(LocaleId sourceLocale) {
        this.sourceLocale = sourceLocale;
    }

    public LocaleId getTargetLocale() {
        return this.targetLocale;
    }

    @StepParameterMapping(parameterType=StepParameterType.TARGET_LOCALE)
    public void setTargetLocale(LocaleId targetLocale) {
        this.targetLocale = targetLocale;
    }

    private Tokens tokenize(TextContainer tc, LocaleId language) {
        if (tc == null) {
            return null;
        }
        if (Util.isNullOrEmpty((LocaleId)language)) {
            return null;
        }
        if (this.positions == null) {
            return null;
        }
        this.positions.clear();
        Tokens tokens = new Tokens();
        String text = tc.contentIsOneSegment() ? TextUnitUtil.getText((TextFragment)tc.getFirstContent(), this.positions) : TextUnitUtil.getText((TextFragment)tc.getUnSegmentedContentCopy(), this.positions);
        this.tokenizer.init(text, language);
        while (this.tokenizer.hasNext()) {
            Token t = this.tokenizer.next();
            if (t == null) continue;
            tokens.addAll(this.postProcess(t, language));
        }
        tokens.fixRanges(this.positions);
        return tokens.getFilteredList(ListUtil.stringListAsArray(this.params.getIncludedTokenNames()));
    }

    public Collection<? extends Token> postProcess(Token t, LocaleId language) {
        ArrayList<Token> tokens = new ArrayList<Token>();
        tokens.add(t);
        if ((LocaleId.FRENCH.sameLanguageAs(language) || LocaleId.ITALIAN.sameLanguageAs(language)) && APOSTROPHE.matcher(t.getValue()).find()) {
            return this.apostrophe(t, language);
        }
        return tokens;
    }

    public List<Token> apostrophe(Token token, LocaleId locale) {
        Matcher matcher = APOSTROPHE.matcher(token.getValue());
        matcher.find();
        int s = token.getRange().start;
        int e = token.getRange().end;
        ArrayList<Token> tokens = new ArrayList<Token>();
        String[] words = APOSTROPHE.split(token.getValue());
        String value = words[0];
        String name = Tokens.getTokenName(token.getId());
        String description = Tokens.getTokenDescription(token.getId());
        int word1End = s + value.length();
        Token t = new Token(token.getId(), value, name, description, s, word1End);
        tokens.add(t);
        value = matcher.group();
        name = "PUNCTUATION";
        int id = Tokens.getTokenId(name);
        description = Tokens.getTokenDescription(id);
        t = new Token(id, value, name, description, word1End + 1, word1End + 2);
        tokens.add(t);
        value = words[1];
        name = Tokens.getTokenName(token.getId());
        description = Tokens.getTokenDescription(token.getId());
        t = new Token(token.getId(), value, name, description, word1End + 3, e);
        tokens.add(t);
        return tokens;
    }

    private void tokenizeSource(ITextUnit tu) {
        if (tu == null) {
            return;
        }
        Tokens tokens = this.tokenize(tu.getSource(), this.getSourceLocale());
        if (tokens == null) {
            return;
        }
        TokensAnnotation ta = (TokensAnnotation)TextUnitUtil.getSourceAnnotation((ITextUnit)tu, TokensAnnotation.class);
        if (ta == null) {
            TextUnitUtil.setSourceAnnotation((ITextUnit)tu, (IAnnotation)new TokensAnnotation(tokens));
        } else {
            ta.addTokens(tokens);
        }
    }

    private void tokenizeTargets(ITextUnit tu) {
        if (tu == null) {
            return;
        }
        for (LocaleId language : tu.getTargetLocales()) {
            Tokens tokens = this.tokenize(tu.getTarget(language), language);
            if (tokens == null) continue;
            TokensAnnotation ta = (TokensAnnotation)TextUnitUtil.getTargetAnnotation((ITextUnit)tu, (LocaleId)language, TokensAnnotation.class);
            if (ta == null) {
                TextUnitUtil.setTargetAnnotation((ITextUnit)tu, (LocaleId)language, (IAnnotation)new TokensAnnotation(tokens));
                continue;
            }
            ta.addTokens(tokens);
        }
    }

    public String getName() {
        return "Tokenization Step";
    }

    public String getDescription() {
        return "Extracts tokens from the text units content of a document. Expects: filter events. Sends back: filter events.";
    }
}

