/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.lib.segmentation;

import java.util.ArrayList;
import net.sf.okapi.common.ISegmenter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.lib.segmentation.LanguageMap;
import net.sf.okapi.lib.segmentation.Rule;
import net.sf.okapi.lib.segmentation.SRXDocument;
import net.sf.okapi.lib.segmentation.SRXSegmenter;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(value=JUnit4.class)
public class TestIcu4jRules {
    private SRXSegmenter segmenter;
    private SRXDocument doc;
    private ArrayList<Rule> rules;

    @Before
    public void startUp() {
        this.doc = new SRXDocument();
        this.doc.setUseICU4JBreakRules(true);
        this.segmenter = new SRXSegmenter();
        this.rules = new ArrayList();
    }

    @Test
    public void testMetachars() {
        this.testBreak("Sentence 1. Sentence 2.", "\\.", "\\s|<br/?>", "Sentence 1.", " Sentence 2.");
        this.testBreak("Sentence 1. Sentence 2.", null, null, "Sentence 1.", " Sentence 2.");
        this.testBreak("Sentence 1.<br>Sentence 2.", "\\.", "\\s|<br/?>", "Sentence 1.", "<br>Sentence 2.");
        this.testBreak("Sentence 1.<br/>Sentence 2.", "\\.", "\\s|<br/?>", "Sentence 1.", "<br/>Sentence 2.");
    }

    @Test
    public void testMetachars2() {
        this.testBreak("Mr. Holmes is from the U.K. not the U.S. Is Dr. Watson from there too? Yes: both are.", null, null, "Mr.", " Holmes is from the U.K. not the U.S.", 5);
        this.testBreak("The First Darlek Empire has written: \"The simplest statement we know of is the statement of Davross himself, namely, that the members of the empire should destroy 'all life forms,' which is understood to mean universal destruction. No one is justified in making any other statement than this\" (First Darlek Empire letter, Mar. 12, 3035; see also DE 11:4).", null, null, "The First Darlek Empire has written: \"The simplest statement we know of is the statement of Davross himself, namely, that the members of the empire should destroy 'all life forms,' which is understood to mean universal destruction.", " No one is justified in making any other statement than this\" (First Darlek Empire letter, Mar. 12, 3035; see also DE 11:4).");
    }

    private void testBreak(String text, String bbr, String abr, String beforeBreak, String afterBreak) {
        this.rules.clear();
        if (bbr != null && abr != null) {
            this.rules.add(new Rule(bbr, abr, true));
        }
        this.doc.addLanguageRule("default", this.rules);
        this.doc.addLanguageMap(new LanguageMap(".*", "default"));
        this.segmenter.setLanguage(null);
        this.doc.compileLanguageRules(LocaleId.ENGLISH, (ISegmenter)this.segmenter);
        Assert.assertEquals((long)2L, (long)this.segmenter.computeSegments(text));
        TextContainer tc = new TextContainer(text);
        tc.getSegments().create(this.segmenter.getRanges());
        Assert.assertEquals((Object)beforeBreak, (Object)tc.getSegments().get(0).toString());
        Assert.assertEquals((Object)afterBreak, (Object)tc.getSegments().get(1).toString());
    }

    private void testBreak(String text, String bbr, String abr, String beforeBreak, String afterBreak, int numSeg) {
        this.rules.clear();
        if (bbr != null && abr != null) {
            this.rules.add(new Rule(bbr, abr, true));
        }
        this.doc.addLanguageRule("default", this.rules);
        this.doc.addLanguageMap(new LanguageMap(".*", "default"));
        this.segmenter.setLanguage(null);
        this.doc.compileLanguageRules(LocaleId.ENGLISH, (ISegmenter)this.segmenter);
        Assert.assertEquals((long)numSeg, (long)this.segmenter.computeSegments(text));
        TextContainer tc = new TextContainer(text);
        tc.getSegments().create(this.segmenter.getRanges());
        Assert.assertEquals((Object)beforeBreak, (Object)tc.getSegments().get(0).toString());
        Assert.assertEquals((Object)afterBreak, (Object)tc.getSegments().get(1).toString());
    }
}

