001/*
002 * Copyright 2023 the original author or authors.
003 * <p>
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 * <p>
008 * https://www.apache.org/licenses/LICENSE-2.0
009 * <p>
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package de.cuioss.tools.string;
017
018import static de.cuioss.tools.base.Preconditions.checkArgument;
019import static de.cuioss.tools.string.MoreStrings.isEmpty;
020import static de.cuioss.tools.string.MoreStrings.requireNotEmpty;
021import static java.util.Objects.requireNonNull;
022
023import java.util.Collections;
024import java.util.List;
025import java.util.regex.Pattern;
026
027import de.cuioss.tools.collect.CollectionBuilder;
028import de.cuioss.tools.logging.CuiLogger;
029import lombok.AccessLevel;
030import lombok.NonNull;
031import lombok.RequiredArgsConstructor;
032
033/**
034 * Derived from Googles Splitter.
035 * <p>
036 * It uses internally the {@link String#split(String)} implementation of java
037 * and provides a guava like wrapper. This results in an implicit splitting of
038 * the whole String compared to the lazy / deferred splitting of guava. It
039 * focuses and RegEx-based splitting and omits the fixedLength and Map based
040 * variants.
041 * </p>
042 * <h2>Migrating from Guava</h2>
043 * <p>
044 * In order to migrate for most case you only need to replace the package name
045 * on the import. A major different is that the split method provided is
046 * {@link #splitToList(String)}, the variant split() is replaced by it
047 * completely.
048 * </p>
049 * <h2>Changes to Guavas-Splitter</h2>
050 * <p>
051 * It is quite similar to the guava variant but behaves a little different in
052 * certain details, especially in the context of {@link Splitter#limit(int)} and
053 * {@link Splitter#trimResults()}, {@link Splitter#omitEmptyStrings()}. While
054 * guavas version will filter the limit elements after the application of omit /
055 * trim, this version will do it the other way round, resulting in a different
056 * result compared to the guava version.
057 * </p>
058 *
059 * @author Oliver Wolff
060 *
061 */
062@RequiredArgsConstructor(access = AccessLevel.MODULE)
063public final class Splitter {
064
065    private static final CuiLogger log = new CuiLogger(Splitter.class);
066
067    @NonNull
068    private final SplitterConfig splitterConfig;
069
070    /**
071     * Returns a splitter that uses the given fixed string as a separator. For
072     * example, {@code
073     * Splitter.on(", ").split("foo, bar,baz")} returns an iterable containing
074     * {@code ["foo",
075     * "bar,baz"]}.
076     *
077     * @param separator the literal, nonempty string to recognize as a separator
078     *
079     * @return a splitter, with default settings, that recognizes that separator
080     */
081    public static Splitter on(final String separator) {
082        requireNotEmpty(separator);
083        return new Splitter(SplitterConfig.builder().separator(separator).build());
084    }
085
086    /**
087     * Returns a splitter that uses the given fixed string as a separator. For
088     * example, {@code
089     * Splitter.on(", ").split("foo, bar,baz")} returns an iterable containing
090     * {@code ["foo",
091     * "bar,baz"]}.
092     *
093     * @param separator the literal, nonempty string to recognize as a separator
094     *
095     * @return a splitter, with default settings, that recognizes that separator
096     */
097    public static Splitter on(final char separator) {
098        requireNonNull(separator);
099        return new Splitter(SplitterConfig.builder().separator(String.valueOf(separator)).build());
100    }
101
102    /**
103     * Returns a splitter that behaves equivalently to {@code this} splitter, but
104     * automatically omits empty strings from the results. For example, {@code
105     * Splitter.on(',').omitEmptyStrings().split(",a,,,b,c,,")} returns an iterable
106     * containing only {@code ["a", "b", "c"]}.
107     *
108     * <p>
109     * If either {@code trimResults} option is also specified when creating a
110     * splitter, that splitter always trims results first before checking for
111     * emptiness. So, for example, {@code
112     * Splitter.on(':').omitEmptyStrings().trimResults().split(": : : ")} returns an
113     * empty iterable.
114     *
115     * @return a splitter with the desired configuration
116     */
117    public Splitter omitEmptyStrings() {
118        return new Splitter(splitterConfig.copy().omitEmptyStrings(true).build());
119    }
120
121    /**
122     * Usually the separator will be pre-processed before being passed to
123     * {@link String#split(String)}. This is needed to mask special characters that
124     * will harm {@link Pattern#compile(String)}. If you want to disable this
125     * behavior and take care for your self you can change this method by calling
126     * this method.
127     *
128     * @return a splitter with the desired configuration
129     */
130    public Splitter doNotModifySeparatorString() {
131        return new Splitter(splitterConfig.copy().doNotModifySeparatorString(true).build());
132    }
133
134    /**
135     * Returns a splitter that behaves equivalently to {@code this} splitter, but
136     * automatically removes leading and trailing whitespace from each returned
137     * substring. For example,
138     * {@code Splitter.on(',').trimResults().split(" a, b ,c ")} returns an iterable
139     * containing {@code ["a", "b", "c"]}.
140     *
141     * @return a splitter with the desired configuration
142     */
143    public Splitter trimResults() {
144        return new Splitter(splitterConfig.copy().trimResults(true).build());
145    }
146
147    /**
148     * Returns a splitter that behaves equivalently to {@code this} splitter but
149     * stops splitting after it reaches the limit. The limit defines the maximum
150     * number of items returned by the iterator, or the maximum size of the list
151     * returned by {@link #splitToList}.
152     *
153     * <p>
154     * For example, {@code Splitter.on(',').limit(3).split("a,b,c,d")} returns an
155     * iterable containing {@code ["a", "b", "c,d"]}. When omitting empty strings,
156     * the omitted strings do not count. Hence,
157     * {@code Splitter.on(',').limit(3).omitEmptyStrings().split("a,,,b,,,c,d")}
158     * returns an iterable containing {@code ["a", "b", "c,d"}. When trim is
159     * requested, all entries are trimmed, including the last. Hence
160     * {@code Splitter.on(',').limit(3).trimResults().split(" a , b
161     * , c , d ")} results in {@code ["a", "b", "c , d"]}.
162     *
163     * @param maxItems the maximum number of items returned
164     * @return a splitter with the desired configuration
165     */
166    public Splitter limit(int maxItems) {
167        checkArgument(maxItems > 0, "must be greater than zero: %s");
168        return new Splitter(splitterConfig.copy().maxItems(maxItems).build());
169    }
170
171    /**
172     * Splits {@code sequence} into string components and returns them as an
173     * immutable list.
174     *
175     * @param sequence the sequence of characters to split
176     *
177     * @return an immutable list of the segments split from the parameter
178     */
179    public List<String> splitToList(String sequence) {
180        log.trace("Splitting String {} with configuration {}", sequence, splitterConfig);
181        if (isEmpty(sequence)) {
182            return Collections.emptyList();
183        }
184        var splitted = sequence.split(handleSplitCharacter(splitterConfig.getSeparator()),
185                splitterConfig.getMaxItems());
186        if (null == splitted || 0 == splitted.length) {
187            log.trace("No content to be returned for input {} and configuration {}", sequence, splitterConfig);
188            return Collections.emptyList();
189        }
190        var builder = new CollectionBuilder<String>();
191
192        for (String element : splitted) {
193            addIfApplicable(builder, element);
194        }
195        return builder.toImmutableList();
196    }
197
198    private String handleSplitCharacter(String separator) {
199        if (splitterConfig.isDoNotModifySeparatorString()) {
200            return separator;
201        }
202        return Pattern.quote(separator);
203    }
204
205    private void addIfApplicable(CollectionBuilder<String> builder, String element) {
206        if (null == element) {
207            return;
208        }
209        var toDo = element;
210        if (splitterConfig.isTrimResults()) {
211            toDo = toDo.trim();
212        }
213        if (!splitterConfig.isOmitEmptyStrings()) {
214            builder.add(toDo);
215            return;
216        }
217        if (!toDo.isEmpty()) { // Omit empty strings
218            builder.add(toDo);
219        }
220    }
221}