/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.lib.segmentation;

import com.tngtech.java.junit.dataprovider.DataProvider;
import com.tngtech.java.junit.dataprovider.DataProviderRunner;
import com.tngtech.java.junit.dataprovider.UseDataProvider;
import java.util.ArrayList;
import java.util.List;
import net.sf.okapi.common.FileLocation;
import net.sf.okapi.common.ISegmenter;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.filterwriter.GenericContent;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.ISegments;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.InlineAnnotation;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnit;
import net.sf.okapi.lib.segmentation.LanguageMap;
import net.sf.okapi.lib.segmentation.Rule;
import net.sf.okapi.lib.segmentation.SRXDocument;
import net.sf.okapi.lib.segmentation.SRXSegmenter;
import net.sf.okapi.lib.segmentation.SegmentationRuleException;
import org.hamcrest.CoreMatchers;
import org.hamcrest.Matcher;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;

@RunWith(value=DataProviderRunner.class)
public class SRXSegmenterTest {
    private GenericContent fmt = new GenericContent();

    @Before
    public void setUp() {
    }

    @Test
    public void testDefaultOptions() {
        SRXSegmenter seg = new SRXSegmenter();
        Assert.assertFalse((boolean)seg.cascade());
        Assert.assertTrue((boolean)seg.segmentSubFlows());
        Assert.assertFalse((boolean)seg.includeStartCodes());
        Assert.assertTrue((boolean)seg.includeEndCodes());
        Assert.assertFalse((boolean)seg.includeIsolatedCodes());
        Assert.assertFalse((boolean)seg.oneSegmentIncludesAll());
        Assert.assertFalse((boolean)seg.trimLeadingWhitespaces());
        Assert.assertFalse((boolean)seg.trimTrailingWhitespaces());
        Assert.assertFalse((boolean)seg.treatIsolatedCodesAsWhitespace());
    }

    @Test
    public void testChangedOptions() {
        SRXSegmenter seg = new SRXSegmenter();
        seg.setOptions(false, true, false, true, true, true, true, true, false, true);
        Assert.assertFalse((boolean)seg.segmentSubFlows());
        Assert.assertTrue((boolean)seg.includeStartCodes());
        Assert.assertFalse((boolean)seg.includeEndCodes());
        Assert.assertTrue((boolean)seg.includeIsolatedCodes());
        Assert.assertTrue((boolean)seg.oneSegmentIncludesAll());
        Assert.assertTrue((boolean)seg.trimLeadingWhitespaces());
        Assert.assertTrue((boolean)seg.trimTrailingWhitespaces());
        Assert.assertTrue((boolean)seg.useJavaRegex());
        Assert.assertTrue((boolean)seg.treatIsolatedCodesAsWhitespace());
    }

    @Test
    public void testSimpleSegmentationDefault() {
        ISegmenter seg = this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        TextContainer tc = new TextContainer("Part 1.  Part 2. ");
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)3L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)3L, (long)segments.count());
        Assert.assertEquals((Object)"Part 1.", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)"  Part 2.", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)" ", (Object)segments.get(2).toString());
    }

    @Test
    public void testNegativeRules() {
        ISegmenter seg = this.createSegmenterWithRules2(LocaleId.fromString((String)"en"));
        TextContainer tc = new TextContainer("Part A. Part B.");
        ISegments segments = tc.getSegments();
        seg.computeSegments(tc);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)4L, (long)segments.count());
        Assert.assertEquals((Object)"Part A", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)".", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)" Part B", (Object)segments.get(2).toString());
        Assert.assertEquals((Object)".", (Object)segments.get(3).toString());
    }

    @Test
    public void testSimpleSegmentationNewLines() {
        ISegmenter seg = this.createSegmenterWithNewLineRules(LocaleId.fromString((String)"en"));
        TextFragment tf = new TextFragment();
        TextContainer tc = new TextContainer(tf);
        tf.append("\n");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "x", "Part 1."));
        tf.append("\n");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "x", "Part 2."));
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)3L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)3L, (long)segments.count());
        Assert.assertEquals((Object)"\n", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)"Part 1.\n", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)"Part 2.", (Object)segments.get(2).toString());
    }

    @Test
    public void testEmptyBeforeRule() {
        ISegmenter seg = this.createSegmenterWithEmptyBeforeRule(LocaleId.fromString((String)"en"));
        TextContainer tc = new TextContainer("01.07.2014.");
        ISegments segments = tc.getSegments();
        seg.computeSegments(tc);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)1L, (long)segments.count());
        Assert.assertEquals((Object)"01.07.2014.", (Object)segments.get(0).toString());
    }

    @Test
    public void testGermanAbbrNoBreakFullSrx() {
        SRXDocument doc = new SRXDocument();
        FileLocation.In location = FileLocation.fromClass(this.getClass()).in("/language_tools_german.srx");
        doc.loadRules(location.asInputStream());
        ISegmenter seg = doc.compileLanguageRules(LocaleId.GERMAN, null);
        TextUnit tu = new TextUnit("temp", "Aus denen er sch\u00f6pfen konnte d. h. nat\u00fcrlich.");
        tu.createSourceSegmentation(seg);
        ISegments segments = tu.getSource().getSegments();
        Assert.assertEquals((long)1L, (long)segments.count());
        Assert.assertEquals((Object)"Aus denen er sch\u00f6pfen konnte d. h. nat\u00fcrlich.", (Object)segments.get(0).toString());
    }

    @Test
    public void testGermanAbbrNoBreakConflictingRules() {
        ISegmenter seg = this.createSegmenterWithGermanNoBreak(LocaleId.GERMAN);
        TextContainer tc = new TextContainer("Aus denen er sch\u00f6pfen konnte d. h. nat\u00fcrlich.");
        ISegments segments = tc.getSegments();
        seg.computeSegments(tc);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)1L, (long)segments.count());
        Assert.assertEquals((Object)"Aus denen er sch\u00f6pfen konnte d. h. nat\u00fcrlich.", (Object)segments.get(0).toString());
    }

    private ISegmenter createSegmenterWithNewLineRules(LocaleId locId) {
        SRXDocument doc = new SRXDocument();
        LanguageMap langMap = new LanguageMap(".*", "default");
        doc.addLanguageMap(langMap);
        ArrayList<Rule> langRules = new ArrayList<Rule>();
        langRules.add(new Rule("\\n", "", true));
        doc.addLanguageRule("default", langRules);
        return doc.compileLanguageRules(locId, null);
    }

    @Test
    public void testSimpleSegmentationTrimLeading() {
        SRXSegmenter seg = (SRXSegmenter)this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setOptions(seg.segmentSubFlows(), seg.includeStartCodes(), seg.includeEndCodes(), seg.includeIsolatedCodes(), seg.oneSegmentIncludesAll(), true, false, seg.useJavaRegex(), false, false);
        TextContainer tc = new TextContainer(" Part 1.  Part 2. ");
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)2L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)2L, (long)segments.count());
        Assert.assertEquals((Object)"Part 1.", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)"Part 2.", (Object)segments.get(1).toString());
    }

    @Test
    public void testSimpleSegmentationTrimTrailing() {
        SRXSegmenter seg = (SRXSegmenter)this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setOptions(seg.segmentSubFlows(), seg.includeStartCodes(), seg.includeEndCodes(), seg.includeIsolatedCodes(), seg.oneSegmentIncludesAll(), false, true, seg.useJavaRegex(), false, false);
        TextContainer tc = new TextContainer(" Part 1.  Part 2. ");
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)2L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)2L, (long)segments.count());
        Assert.assertEquals((Object)" Part 1.", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)"  Part 2.", (Object)segments.get(1).toString());
    }

    @Test
    public void testSimpleSegmentationOneIsAll() {
        SRXSegmenter seg = (SRXSegmenter)this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setOptions(seg.segmentSubFlows(), seg.includeStartCodes(), seg.includeEndCodes(), seg.includeIsolatedCodes(), true, true, true, seg.useJavaRegex(), false, false);
        TextContainer tc = new TextContainer(" Part 1  ");
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)1L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)1L, (long)segments.count());
        Assert.assertEquals((Object)" Part 1  ", (Object)segments.get(0).toString());
    }

    @Test
    public void testTUSegmentation() {
        ITextUnit tu = this.createMultiTargetSegmentedTextUnit();
        ISegments segs = tu.getSource().getSegments();
        Assert.assertEquals((long)3L, (long)segs.count());
        Assert.assertEquals((Object)"Part 1.", (Object)segs.get(0).toString());
        Assert.assertEquals((Object)" Part 2.", (Object)segs.get(1).toString());
        Assert.assertEquals((Object)" Part 3.", (Object)segs.get(2).toString());
        segs = tu.getSourceSegments();
        Assert.assertEquals((Object)" Part 2.", (Object)segs.get(1).toString());
    }

    @Test
    public void testCodedSegmentationDefault1() {
        SRXSegmenter seg = (SRXSegmenter)this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setOptions(seg.segmentSubFlows(), false, true, false, false, false, false, seg.useJavaRegex(), false, false);
        TextContainer tc = this.createCodedText();
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)5L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)5L, (long)segments.count());
        Assert.assertEquals((Object)"PH after.", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)"<br/><b> End after.</b>", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)" Start after.", (Object)segments.get(2).toString());
        Assert.assertEquals((Object)"<i> Text.</i>", (Object)segments.get(3).toString());
        Assert.assertEquals((Object)"  ", (Object)segments.get(4).toString());
    }

    @Test
    public void testCodedSegmentationNotDefault1() {
        SRXSegmenter seg = (SRXSegmenter)this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setOptions(seg.segmentSubFlows(), true, false, true, false, false, false, seg.useJavaRegex(), false, false);
        TextContainer tc = this.createCodedText();
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)5L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)5L, (long)segments.count());
        Assert.assertEquals((Object)"PH after.<br/><b>", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)" End after.", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)"</b> Start after.<i>", (Object)segments.get(2).toString());
        Assert.assertEquals((Object)" Text.", (Object)segments.get(3).toString());
        Assert.assertEquals((Object)"</i>  ", (Object)segments.get(4).toString());
    }

    @Test
    public void testCodedSegmentationDefault2() {
        SRXSegmenter seg = (SRXSegmenter)this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setOptions(seg.segmentSubFlows(), false, true, false, false, false, false, seg.useJavaRegex(), false, false);
        TextContainer tc = this.createCodedText2();
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)5L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)5L, (long)segments.count());
        Assert.assertEquals((Object)"PH after.", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)"<br/><br/><b><i> End after.</i></b>", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)" Start after.", (Object)segments.get(2).toString());
        Assert.assertEquals((Object)"<u><i> Text.</i></u>", (Object)segments.get(3).toString());
        Assert.assertEquals((Object)"  ", (Object)segments.get(4).toString());
    }

    @Test
    public void testCodedSegmentationNotDefault2() {
        SRXSegmenter seg = (SRXSegmenter)this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setOptions(seg.segmentSubFlows(), true, false, true, false, false, false, seg.useJavaRegex(), false, false);
        TextContainer tc = this.createCodedText2();
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)5L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)5L, (long)segments.count());
        Assert.assertEquals((Object)"PH after.<br/><br/><b><i>", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)" End after.", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)"</i></b> Start after.<u><i>", (Object)segments.get(2).toString());
        Assert.assertEquals((Object)" Text.", (Object)segments.get(3).toString());
        Assert.assertEquals((Object)"</i></u>  ", (Object)segments.get(4).toString());
    }

    @Test
    public void testTUSegmentationRemoval() {
        ITextUnit tu = this.createMultiTargetSegmentedTextUnit();
        tu.removeTarget(LocaleId.FRENCH);
        ISegments segs = tu.getSource().getSegments();
        Assert.assertEquals((long)3L, (long)segs.count());
        Assert.assertEquals((Object)" Part 2.", (Object)segs.get(1).toString());
    }

    @Test
    public void testTUSegmentationRemovalAll() {
        ITextUnit tu = this.createMultiTargetSegmentedTextUnit();
        tu.removeAllSegmentations();
        ISegments segs = tu.getSource().getSegments();
        Assert.assertEquals((long)1L, (long)segs.count());
        Assert.assertEquals((Object)"Part 1. Part 2. Part 3.", (Object)tu.getSource().getLastContent().toText());
    }

    @Test
    public void testICUSpecificPatterns() {
        SRXDocument doc = new SRXDocument();
        LanguageMap langMap = new LanguageMap(".*", "default");
        doc.addLanguageMap(langMap);
        ArrayList<Rule> langRules = new ArrayList<Rule>();
        langRules.add(new Rule("\\w", "\\s", true));
        langRules.add(new Rule("\\d", "\\s", true));
        langRules.add(new Rule("\\u301c", "\\s", true));
        langRules.add(new Rule("z", "\\x{0608}", true));
        doc.addLanguageRule("default", langRules);
        ISegmenter segmenter = doc.compileLanguageRules(LocaleId.ENGLISH, null);
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("e "));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("e\u00a0"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("e\u1680"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\u0104 "));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\u0104\u00a0"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\u0104\u1680"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("1 "));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\u0b66 "));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\uff19 "));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("1\u1680"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\u0b66\u1680"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\uff19\u1680"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("\u301c\u1680"));
        Assert.assertEquals((long)2L, (long)segmenter.computeSegments("z\u0608"));
        Assert.assertEquals((long)1L, (long)segmenter.computeSegments("\u20ac\u1680"));
    }

    @Test
    public void testWithWithoutTrailingWhitespace() {
        ISegmenter seg = this.createSegmenterWithRules(LocaleId.ENGLISH);
        TextUnit tu = new TextUnit("1", "This sentence should not be split.");
        seg.computeSegments(tu.getSource());
        tu.getSource().getSegments().create(seg.getRanges());
        Assert.assertEquals((Object)"[This sentence should not be split.]", (Object)this.fmt.printSegmentedContent(tu.getSource(), true));
        tu = new TextUnit("1", "This sentence should not be split. ");
        seg.computeSegments(tu.getSource());
        tu.getSource().getSegments().create(seg.getRanges());
        Assert.assertEquals((Object)"[This sentence should not be split.][ ]", (Object)this.fmt.printSegmentedContent(tu.getSource(), true));
    }

    @Test
    public void testStoreCodePositions() {
        SRXSegmenter seg = new SRXSegmenter();
        TextFragment tf = new TextFragment();
        tf.append("ABC");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "11"));
        tf.append("DE");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "22"));
        tf.append("FG");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "33"));
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "44"));
        tf.append("HIJ");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "55"));
        tf.append("K");
        String text = tf.getCodedText();
        List codePositions = seg.storeCodePositions(text);
        Assert.assertEquals((long)5L, (long)codePositions.size());
        Assert.assertEquals((long)3L, (long)((Integer)codePositions.get(0)).intValue());
        Assert.assertEquals((long)5L, (long)((Integer)codePositions.get(1)).intValue());
        Assert.assertEquals((long)7L, (long)((Integer)codePositions.get(2)).intValue());
        Assert.assertEquals((long)7L, (long)((Integer)codePositions.get(3)).intValue());
        Assert.assertEquals((long)10L, (long)((Integer)codePositions.get(4)).intValue());
        List originalCodePositions = seg.storeOriginalCodePositions(text);
        Assert.assertEquals((long)1L, (long)seg.recalcPos(text, 1, codePositions, originalCodePositions));
        Assert.assertEquals((long)2L, (long)seg.recalcPos(text, 2, codePositions, originalCodePositions));
        Assert.assertEquals((long)3L, (long)seg.recalcPos(text, 3, codePositions, originalCodePositions));
        Assert.assertEquals((long)6L, (long)seg.recalcPos(text, 4, codePositions, originalCodePositions));
        Assert.assertEquals((long)7L, (long)seg.recalcPos(text, 5, codePositions, originalCodePositions));
        Assert.assertEquals((long)10L, (long)seg.recalcPos(text, 6, codePositions, originalCodePositions));
        Assert.assertEquals((long)11L, (long)seg.recalcPos(text, 7, codePositions, originalCodePositions));
        Assert.assertEquals((long)16L, (long)seg.recalcPos(text, 8, codePositions, originalCodePositions));
        Assert.assertEquals((long)17L, (long)seg.recalcPos(text, 9, codePositions, originalCodePositions));
        Assert.assertEquals((long)18L, (long)seg.recalcPos(text, 10, codePositions, originalCodePositions));
        Assert.assertEquals((long)21L, (long)seg.recalcPos(text, 11, codePositions, originalCodePositions));
    }

    @Test
    public void testMRK() {
        TextFragment tf = new TextFragment();
        tf.append(TextFragment.TagType.OPENING, "span", "<span>");
        tf.append("Sentence one. Sentence two.");
        tf.append(TextFragment.TagType.CLOSING, "span", "</span>");
        Assert.assertEquals((Object)"<span>Sentence one. Sentence two.</span>", (Object)tf.toText());
        tf.annotate(3, 28, "anno", new InlineAnnotation("metadata"));
        Assert.assertEquals((Object)"<1>S<2>entence one. Sentence two</2>.</1>", (Object)this.fmt.setContent(tf).toString());
        ISegmenter seg = this.createSegmenterWithRules(LocaleId.ENGLISH);
        TextContainer tc = new TextContainer(tf);
        Assert.assertEquals((long)2L, (long)seg.computeSegments(tc));
        tc.getSegments().create(seg.getRanges());
        Assert.assertEquals((Object)"[<span>Sentence one.][ Sentence two.</span>]", (Object)this.fmt.printSegmentedContent(tc, true, true));
        Assert.assertEquals((Object)"[<b1/>S<b2/>entence one.][ Sentence two<e2/>.<e1/>]", (Object)this.fmt.printSegmentedContent(tc, true, false));
    }

    @Test
    public void testTreatIsolatedCodesAsWhitespace() {
        ISegmenter seg = this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setTreatIsolatedCodesAsWhitespace(true);
        TextFragment tf = new TextFragment();
        TextContainer tc = new TextContainer(tf);
        tf.append("Hello.");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "x", "x"));
        tf.append("To the.");
        tf.append(new Code(TextFragment.TagType.PLACEHOLDER, "x", "x"));
        tf.append("World.");
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)3L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)3L, (long)segments.count());
        Assert.assertEquals((Object)"Hello.", (Object)segments.get(0).toString());
        Assert.assertEquals((Object)"xTo the.", (Object)segments.get(1).toString());
        Assert.assertEquals((Object)"xWorld.", (Object)segments.get(2).toString());
    }

    @Test
    public void testDontTreatNonIsolatedCodesAsWhitespace() {
        ISegmenter seg = this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        seg.setTreatIsolatedCodesAsWhitespace(true);
        TextFragment tf = new TextFragment();
        TextContainer tc = new TextContainer(tf);
        tf.append("Hello.");
        tf.append(new Code(TextFragment.TagType.OPENING, "x", "x"));
        tf.append("To the.");
        tf.append(new Code(TextFragment.TagType.CLOSING, "x", "x"));
        tf.append("World.");
        ISegments segments = tc.getSegments();
        int n = seg.computeSegments(tc);
        Assert.assertEquals((long)1L, (long)n);
        segments.create(seg.getRanges());
        Assert.assertEquals((long)1L, (long)segments.count());
        Assert.assertEquals((Object)"Hello.xTo the.xWorld.", (Object)segments.get(0).toString());
    }

    @Test(expected=SegmentationRuleException.class)
    public void computeSegmentsThrowsSegmentationRuleException() {
        SRXSegmenter segmenter = new SRXSegmenter();
        segmenter.computeSegments("");
    }

    private static TextFragment getTextFragment(String type) {
        TextFragment tf = new TextFragment();
        switch (type) {
            case "with-codes-seven": {
                tf.append(TextFragment.TagType.OPENING, "x", "<x>");
                tf.append("One.");
                tf.append(TextFragment.TagType.CLOSING, "x", "</x>");
                tf.append(TextFragment.TagType.OPENING, "x", "<x>");
                tf.append("Two.{{ Three. Four. ");
                tf.append(TextFragment.TagType.CLOSING, "x", "</x>");
                tf.append("Five.");
                tf.append(TextFragment.TagType.PLACEHOLDER, "y", "<y/>");
                tf.append("}}Six.");
                tf.append(TextFragment.TagType.PLACEHOLDER, "y", "<y/>");
                tf.append(TextFragment.TagType.OPENING, "x", "<x>");
                tf.append("Seven. ");
                tf.append(TextFragment.TagType.CLOSING, "x", "</x>");
                break;
            }
            case "empty-3": {
                tf.append("   ");
                break;
            }
            default: {
                tf.append("One.");
            }
        }
        return tf;
    }

    @DataProvider
    public static Object[][] computeSegmentsWithTreatIsolatedCodesAsWhitespaceDataProvider() {
        return new Object[][]{{"\\b", false, true, false, false, false, false, SRXSegmenterTest.getTextFragment("with-codes-seven"), new String[]{"<x>One.</x><x>Two.", "{{ Three. Four. </x>Five.<y/>}}", "Six.<y/><x>Seven. </x>"}}, {"\\.", false, true, false, false, false, false, SRXSegmenterTest.getTextFragment("with-codes-seven"), new String[]{"<x>One.</x>", "<x>Two.", "{{ Three. Four. </x>Five.<y/>}}", "Six.<y/>", "<x>Seven.", " </x>"}}, {"\\.", true, false, false, false, false, true, SRXSegmenterTest.getTextFragment("with-codes-seven"), new String[]{"One.</x>", "Two.", "{{ Three. Four. </x>Five.<y/>}}", "Six.", "Seven.", " </x>"}}, {"\\.", true, false, true, true, false, false, SRXSegmenterTest.getTextFragment("with-codes-seven"), new String[]{"<x>One.</x><x>", "Two.", "{{ Three. Four. </x>Five.<y/>}}", "Six.", "<y/><x>Seven.", " </x>"}}, {"\\.", true, false, true, true, false, true, SRXSegmenterTest.getTextFragment("with-codes-seven"), new String[]{"<x>One.</x><x>", "Two.", "{{ Three. Four. </x>Five.<y/>}}", "Six.", "<x>Seven.", " </x>"}}, {"\\.", true, false, true, true, false, true, SRXSegmenterTest.getTextFragment("one"), new String[]{"One."}}, {"  ", false, false, false, false, true, false, SRXSegmenterTest.getTextFragment("empty-3"), new String[]{"   "}}};
    }

    @Test
    @UseDataProvider(value="computeSegmentsWithTreatIsolatedCodesAsWhitespaceDataProvider")
    public void computeSegmentsWithTreatIsolatedCodesAsWhitespace(String patternBeforeBrakePoint, boolean useJavaRegex, boolean includeIsolatedCodes, boolean includeStartCodes, boolean oneSegmentIncludesAll, boolean trimLeadingWS, boolean trimCodes, TextFragment textFragment, String[] expectedSegments) {
        ISegmenter seg = this.createSegmenterWithEmptyAfterRule(LocaleId.fromString((String)"en"), patternBeforeBrakePoint, useJavaRegex);
        seg.setTreatIsolatedCodesAsWhitespace(true);
        seg.setIncludeStartCodes(includeStartCodes);
        seg.setIncludeIsolatedCodes(includeIsolatedCodes);
        seg.setOneSegmentIncludesAll(oneSegmentIncludesAll);
        seg.setTrimLeadingWS(trimLeadingWS);
        seg.setTrimCodes(trimCodes);
        TextContainer tc = new TextContainer(textFragment);
        ISegments segments = tc.getSegments();
        seg.computeSegments(tc);
        segments.create(seg.getRanges());
        Assert.assertThat((Object)segments.count(), (Matcher)CoreMatchers.is((Object)expectedSegments.length));
        seg.computeSegments(tc);
        for (int i = 0; i < expectedSegments.length; ++i) {
            Assert.assertThat((Object)segments.get(i).toString(), (Matcher)CoreMatchers.equalTo((Object)expectedSegments[i]));
        }
    }

    private ISegmenter createSegmenterWithRules(LocaleId locId) {
        SRXDocument doc = new SRXDocument();
        LanguageMap langMap = new LanguageMap(".*", "default");
        doc.addLanguageMap(langMap);
        ArrayList<Rule> langRules = new ArrayList<Rule>();
        langRules.add(new Rule("\\.", "\\s", true));
        doc.addLanguageRule("default", langRules);
        return doc.compileLanguageRules(locId, null);
    }

    private ISegmenter createSegmenterWithRules2(LocaleId locId) {
        SRXDocument doc = new SRXDocument();
        LanguageMap langMap = new LanguageMap(".*", "default");
        doc.addLanguageMap(langMap);
        ArrayList<Rule> langRules = new ArrayList<Rule>();
        langRules.add(new Rule("[a-zA-Z ]", "[a-zA-Z ]", false));
        langRules.add(new Rule(".", "", true));
        doc.addLanguageRule("default", langRules);
        return doc.compileLanguageRules(locId, null);
    }

    private ISegmenter createSegmenterWithEmptyBeforeRule(LocaleId locId) {
        SRXDocument doc = new SRXDocument();
        LanguageMap langMap = new LanguageMap(".*", "default");
        doc.addLanguageMap(langMap);
        ArrayList<Rule> langRules = new ArrayList<Rule>();
        langRules.add(new Rule("", "[a-zA-Z0-9]", false));
        langRules.add(new Rule("\\.", "", true));
        doc.addLanguageRule("default", langRules);
        doc.setCascade(true);
        return doc.compileLanguageRules(locId, null);
    }

    private ISegmenter createSegmenterWithEmptyAfterRule(LocaleId locId, String patternBeforeBrakePoint, boolean useJavaRegex) {
        SRXDocument doc = new SRXDocument();
        LanguageMap langMap = new LanguageMap(".*", "default");
        doc.addLanguageMap(langMap);
        ArrayList<Rule> langRules = new ArrayList<Rule>();
        langRules.add(new Rule(patternBeforeBrakePoint, "", true));
        doc.addLanguageRule("default", langRules);
        doc.setCascade(true);
        doc.setMaskRule("\\{\\{.*\\}\\}");
        return doc.compileLanguageRules(locId, null);
    }

    private ISegmenter createSegmenterWithGermanNoBreak(LocaleId locId) {
        SRXDocument doc = new SRXDocument();
        LanguageMap langMap = new LanguageMap("de", "German");
        doc.addLanguageMap(langMap);
        ArrayList<Rule> langRules = new ArrayList<Rule>();
        langRules.add(new Rule("\\b\\p{L}\\.", "\\s+", false));
        langRules.add(new Rule("[\\.]", "\\s+", true));
        doc.addLanguageRule("German", langRules);
        doc.setCascade(true);
        return doc.compileLanguageRules(locId, null);
    }

    private ITextUnit createMultiTargetSegmentedTextUnit() {
        ISegmenter segmenter = this.createSegmenterWithRules(LocaleId.fromString((String)"en"));
        TextUnit tu = new TextUnit("id1", "Part 1. Part 2. Part 3.");
        tu.createSourceSegmentation(segmenter);
        TextContainer tc1 = tu.setTarget(LocaleId.GERMAN, new TextContainer("DE_Part 1. DE_Part 2. DE_Part 3."));
        segmenter.computeSegments(tc1);
        tc1.getSegments().create(segmenter.getRanges());
        TextContainer tc2 = tu.setTarget(LocaleId.FRENCH, new TextContainer("FR_Part 1 and part 2. FR_Part 3."));
        segmenter.computeSegments(tc2);
        tc2.getSegments().create(segmenter.getRanges());
        return tu;
    }

    private TextContainer createCodedText() {
        TextFragment tf = new TextFragment();
        tf.append("PH after.");
        tf.append(TextFragment.TagType.PLACEHOLDER, "break", "<br/>");
        tf.append(TextFragment.TagType.OPENING, "bold", "<b>");
        tf.append(" End after.");
        tf.append(TextFragment.TagType.CLOSING, "bold", "</b>");
        tf.append(" Start after.");
        tf.append(TextFragment.TagType.OPENING, "italics", "<i>");
        tf.append(" Text.");
        tf.append(TextFragment.TagType.CLOSING, "italics", "</i>");
        tf.append("  ");
        return new TextContainer(tf);
    }

    private TextContainer createCodedText2() {
        TextFragment tf = new TextFragment();
        tf.append("PH after.");
        tf.append(TextFragment.TagType.PLACEHOLDER, "break", "<br/>");
        tf.append(TextFragment.TagType.PLACEHOLDER, "break", "<br/>");
        tf.append(TextFragment.TagType.OPENING, "bold", "<b>");
        tf.append(TextFragment.TagType.OPENING, "italics", "<i>");
        tf.append(" End after.");
        tf.append(TextFragment.TagType.CLOSING, "italics", "</i>");
        tf.append(TextFragment.TagType.CLOSING, "bold", "</b>");
        tf.append(" Start after.");
        tf.append(TextFragment.TagType.OPENING, "under", "<u>");
        tf.append(TextFragment.TagType.OPENING, "italics", "<i>");
        tf.append(" Text.");
        tf.append(TextFragment.TagType.CLOSING, "italics", "</i>");
        tf.append(TextFragment.TagType.CLOSING, "under", "</u>");
        tf.append("  ");
        return new TextContainer(tf);
    }
}

