/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.termextraction;

import java.util.Map;
import net.sf.okapi.common.FileLocation;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnit;
import net.sf.okapi.steps.termextraction.Parameters;
import net.sf.okapi.steps.termextraction.SimpleTermExtractor;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(value=JUnit4.class)
public class SimpleTermExtractorTest {
    private SimpleTermExtractor extr = new SimpleTermExtractor();
    private Parameters params;

    @Before
    public void setUp() {
        this.params = new Parameters();
        FileLocation root = FileLocation.fromClass(this.getClass());
        this.params.setOutputPath(root.out("/terms.txt").toString());
    }

    @Test
    public void testSimpleCase() {
        this.extr.initialize(this.params, LocaleId.ENGLISH, null, null);
        this.extr.processTextUnit((ITextUnit)new TextUnit("id", "This is a test, a rather simple test."));
        this.extr.completeExtraction();
        Map res = this.extr.getTerms();
        Assert.assertEquals((Object)"{test=2}", (Object)res.toString());
    }

    @Test
    public void testLongTextCaseWithMinOcc3() {
        this.params.setMinOccurrences(3);
        this.extr.initialize(this.params, LocaleId.ENGLISH, null, null);
        this.extr.processTextUnit(this.createLongTU());
        this.extr.completeExtraction();
        Map res = this.extr.getTerms();
        Assert.assertEquals((Object)"{complex=4, complex expression=3, expression=3}", (Object)res.toString());
    }

    private ITextUnit createLongTU() {
        TextUnit tu = new TextUnit("id");
        tu.setSourceContent(new TextFragment("This is a test with a complex expression. A complex expression that occurs often. This is important for this test. A complex term like [complex expression] is also a term with several words. Things like $#@ or & should not be seen as words."));
        return tu;
    }
}

