package hex;

import water.MRTask;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.parser.BufferedString;

/* loaded from: input_file:hex/RegexTokenizer.class */
public class RegexTokenizer extends MRTask<RegexTokenizer> {
    private final String _regex;
    private final boolean _toLowercase;
    private final int _minLength;

    /* loaded from: input_file:hex/RegexTokenizer$Builder.class */
    public static class Builder {
        private String _regex;
        private boolean _toLowercase;
        private int _minLength;

        public Builder setRegex(String str) {
            this._regex = str;
            return this;
        }

        public Builder setToLowercase(boolean z) {
            this._toLowercase = z;
            return this;
        }

        public Builder setMinLength(int i) {
            this._minLength = i;
            return this;
        }

        public RegexTokenizer create() {
            return new RegexTokenizer(this._regex, this._toLowercase, this._minLength);
        }
    }

    public RegexTokenizer(String str) {
        this(str, false, 0);
    }

    private RegexTokenizer(String str, boolean z, int i) {
        this._regex = str;
        this._toLowercase = z;
        this._minLength = i;
    }

    @Override // water.MRTask
    public void map(Chunk[] chunkArr, NewChunk newChunk) {
        BufferedString bufferedString = new BufferedString();
        for (int i = 0; i < chunkArr[0]._len; i++) {
            for (Chunk chunk : chunkArr) {
                if (!chunk.isNA(i)) {
                    String bufferedString2 = chunk.atStr(bufferedString, i).toString();
                    if (this._toLowercase) {
                        bufferedString2 = bufferedString2.toLowerCase();
                    }
                    for (String str : bufferedString2.split(this._regex)) {
                        if (str.length() >= this._minLength) {
                            newChunk.addStr(str);
                        }
                    }
                }
            }
            newChunk.addNA();
        }
    }

    public Frame transform(Frame frame) {
        return doAll((byte) 2, frame).outputFrame();
    }
}
