/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.composites;

import de.julielab.genemapper.composites.CompositeToken;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

public class CompositeMentionTokenizer {
    private final Pattern subtokenPattern = Pattern.compile("[A-Z]+|[a-z]+|[0-9]+|[\\p{P}$]|[^A-Za-z0-9\\p{P}\\s]+");

    public Stream<CompositeToken> tokenize(String input) {
        Stream.Builder<CompositeToken> subtokens = Stream.builder();
        Matcher subtokenMatcher = this.subtokenPattern.matcher(input);
        while (subtokenMatcher.find()) {
            int start = subtokenMatcher.start();
            int end = subtokenMatcher.end();
            boolean leftWs = start > 0 && Character.isWhitespace(input.charAt(start - 1));
            boolean rightWs = end < input.length() - 1 && Character.isWhitespace(input.charAt(end));
            subtokens.add(new CompositeToken(input.substring(start, end), start, end, leftWs, rightWs));
        }
        return subtokens.build();
    }
}

