/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.tokenization;

import java.util.Collection;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.IResource;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.StartDocument;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.steps.tokenization.Token;
import net.sf.okapi.steps.tokenization.TokenizationStep;
import net.sf.okapi.steps.tokenization.Tokenizer;
import net.sf.okapi.steps.tokenization.Tokens;
import net.sf.okapi.steps.tokenization.TokensAnnotation;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(value=JUnit4.class)
public class TokenizationTest {
    private final LocaleId locENUS = LocaleId.fromString((String)"en-us");
    private final LocaleId locFR = LocaleId.fromString((String)"fr");
    private TokenizationStep ts;
    private Tokens tokens;
    private final String text = "Jaguar \ud83d\udc7d will sell its new XJ-6 model in the U.S. for a small fortune :-). Expect to pay around USD 120ks ($120,000.00 on 05/30/2007 at 12.30PM). Custom options can set you back another few 10,000 dollars. For details, go to <a href=\"http://www.jaguar.com/sales\" alt=\"Click here\">Jaguar Sales</a> or contact xj-6@jaguar.com. See http://www.jaguar.com/sales, www.jaguar.com, AT&T, P&G, Johnson&Johnson, 192.168.0.5 for info 3.5pct.";

    private Tokens tokenizeText() {
        Tokens res = new Tokens();
        this.ts.handleEvent(new Event(EventType.START_BATCH));
        StartDocument startDoc = new StartDocument("tokenization");
        startDoc.setLocale(this.locENUS);
        startDoc.setMultilingual(false);
        Event event = new Event(EventType.START_DOCUMENT, (IResource)startDoc);
        this.ts.handleEvent(event);
        ITextUnit tu = TextUnitUtil.buildGenericTU((String)"Jaguar \ud83d\udc7d will sell its new XJ-6 model in the U.S. for a small fortune :-). Expect to pay around USD 120ks ($120,000.00 on 05/30/2007 at 12.30PM). Custom options can set you back another few 10,000 dollars. For details, go to <a href=\"http://www.jaguar.com/sales\" alt=\"Click here\">Jaguar Sales</a> or contact xj-6@jaguar.com. See http://www.jaguar.com/sales, www.jaguar.com, AT&T, P&G, Johnson&Johnson, 192.168.0.5 for info 3.5pct.");
        event = new Event(EventType.TEXT_UNIT, (IResource)tu);
        this.ts.handleEvent(event);
        TokensAnnotation ta = (TokensAnnotation)TextUnitUtil.getSourceAnnotation((ITextUnit)tu, TokensAnnotation.class);
        if (ta != null) {
            res.addAll((Collection)ta.getTokens());
        }
        this.ts.handleEvent(new Event(EventType.END_BATCH));
        return res;
    }

    @Before
    public void setUp() {
        this.ts = new TokenizationStep();
    }

    @Test
    public void testTS() {
        this.ts = new TokenizationStep();
        ITextUnit tu = TextUnitUtil.buildGenericTU((String)"Jaguar \ud83d\udc7d will sell its new XJ-6 model in the U.S. for a small fortune :-). Expect to pay around USD 120ks ($120,000.00 on 05/30/2007 at 12.30PM). Custom options can set you back another few 10,000 dollars. For details, go to <a href=\"http://www.jaguar.com/sales\" alt=\"Click here\">Jaguar Sales</a> or contact xj-6@jaguar.com. See http://www.jaguar.com/sales, www.jaguar.com, AT&T, P&G, Johnson&Johnson, 192.168.0.5 for info 3.5pct.");
        Event event = new Event(EventType.TEXT_UNIT, (IResource)tu);
        this.ts.handleEvent(new Event(EventType.START_BATCH));
        this.ts.handleEvent(event);
        this.ts.handleEvent(new Event(EventType.END_BATCH));
    }

    @Test
    public void listTokenizerOutput() {
        Tokens tokens = Tokenizer.tokenize((String)"NASDAQ :-) hypen-word www.google.com is a U.S. stock 1.0006 100 exchange.", (LocaleId)this.locENUS, (String[])new String[0]);
        Assert.assertEquals((long)22L, (long)tokens.size());
    }

    @Test
    public void testTokenizer1() {
        Tokens tokens = Tokenizer.tokenize((String)"Jaguar \ud83d\udc7d will sell its new XJ-6 model in the U.S. for a small fortune :-). Expect to pay around USD 120ks ($120,000.00 on 05/30/2007 at 12.30PM). Custom options can set you back another few 10,000 dollars. For details, go to <a href=\"http://www.jaguar.com/sales\" alt=\"Click here\">Jaguar Sales</a> or contact xj-6@jaguar.com. See http://www.jaguar.com/sales, www.jaguar.com, AT&T, P&G, Johnson&Johnson, 192.168.0.5 for info 3.5pct.", (LocaleId)this.locENUS, (String[])new String[0]);
        Assert.assertEquals((long)132L, (long)tokens.size());
        Assert.assertEquals((Object)"Jaguar", (Object)((Token)tokens.get(0)).getValue());
        Assert.assertEquals((Object)" ", (Object)((Token)tokens.get(1)).getValue());
        Assert.assertEquals((Object)"\ud83d\udc7d", (Object)((Token)tokens.get(2)).getValue());
    }

    @Test
    public void testTokenizer2() {
        Tokens tokens = Tokenizer.tokenize((String)"word word word", (LocaleId)this.locENUS, (String[])new String[]{"WORD"});
        Assert.assertEquals((long)3L, (long)tokens.size());
        Assert.assertEquals((Object)"word", (Object)((Token)tokens.get(0)).getValue());
        Assert.assertEquals((Object)"word", (Object)((Token)tokens.get(1)).getValue());
        Assert.assertEquals((Object)"word", (Object)((Token)tokens.get(2)).getValue());
    }

    @Test
    public void testTokenizerNonWord() {
        String invisibleChars = "\u00ae\u2122\u2120\u00a3\u00a5\u20ac";
        Tokens tokens = Tokenizer.tokenize((String)invisibleChars, (LocaleId)LocaleId.ENGLISH, (String[])new String[0]);
        Assert.assertEquals((long)6L, (long)tokens.size());
        Assert.assertEquals((Object)"OTHER_SYMBOL", (Object)((Token)tokens.get(0)).getName());
        Assert.assertEquals((Object)"CURRENCY", (Object)((Token)tokens.get(5)).getName());
    }

    @Test
    public void hyphenatedWords() {
        Tokens tokens = Tokenizer.tokenize((String)"word-word-word", (LocaleId)this.locENUS, (String[])new String[]{"HYPHENATED_WORD"});
        Assert.assertEquals((long)1L, (long)tokens.size());
        Assert.assertEquals((Object)"word-word-word", (Object)((Token)tokens.get(0)).getValue());
    }

    @Test
    public void allTokens() {
        Tokens tokens = Tokenizer.tokenize((String)"12:00pm 03/12/192 11:45 $300", (LocaleId)this.locENUS, (String[])new String[0]);
        Assert.assertEquals((long)11L, (long)tokens.size());
    }

    @Test
    public void testRange() {
        Range r1 = new Range(1, 5);
        Range r2 = new Range(1, 5);
        Assert.assertNotSame((Object)r1, (Object)r2);
        Assert.assertEquals((Object)r1, (Object)r2);
        Assert.assertEquals((long)r1.hashCode(), (long)r2.hashCode());
        Assert.assertNotSame((Object)r1.toString(), (Object)r2.toString());
    }
}

