001/* 002 * Copyright 2023 the original author or authors. 003 * <p> 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * <p> 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * <p> 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.cuioss.tools.string; 017 018import static de.cuioss.tools.base.Preconditions.checkArgument; 019import static de.cuioss.tools.string.MoreStrings.isEmpty; 020import static de.cuioss.tools.string.MoreStrings.requireNotEmpty; 021import static java.util.Objects.requireNonNull; 022 023import java.util.Collections; 024import java.util.List; 025import java.util.regex.Pattern; 026 027import de.cuioss.tools.collect.CollectionBuilder; 028import de.cuioss.tools.logging.CuiLogger; 029import lombok.AccessLevel; 030import lombok.NonNull; 031import lombok.RequiredArgsConstructor; 032 033/** 034 * Derived from Googles Splitter. 035 * <p> 036 * It uses internally the {@link String#split(String)} implementation of java 037 * and provides a guava like wrapper. This results in an implicit splitting of 038 * the whole String compared to the lazy / deferred splitting of guava. It 039 * focuses and RegEx-based splitting and omits the fixedLength and Map based 040 * variants. 041 * </p> 042 * <h2>Migrating from Guava</h2> 043 * <p> 044 * In order to migrate for most case you only need to replace the package name 045 * on the import. A major different is that the split method provided is 046 * {@link #splitToList(String)}, the variant split() is replaced by it 047 * completely. 048 * </p> 049 * <h2>Changes to Guavas-Splitter</h2> 050 * <p> 051 * It is quite similar to the guava variant but behaves a little different in 052 * certain details, especially in the context of {@link Splitter#limit(int)} and 053 * {@link Splitter#trimResults()}, {@link Splitter#omitEmptyStrings()}. While 054 * guavas version will filter the limit elements after the application of omit / 055 * trim, this version will do it the other way round, resulting in a different 056 * result compared to the guava version. 057 * </p> 058 * 059 * @author Oliver Wolff 060 * 061 */ 062@RequiredArgsConstructor(access = AccessLevel.MODULE) 063public final class Splitter { 064 065 private static final CuiLogger log = new CuiLogger(Splitter.class); 066 067 @NonNull 068 private final SplitterConfig splitterConfig; 069 070 /** 071 * Returns a splitter that uses the given fixed string as a separator. For 072 * example, {@code 073 * Splitter.on(", ").split("foo, bar,baz")} returns an iterable containing 074 * {@code ["foo", 075 * "bar,baz"]}. 076 * 077 * @param separator the literal, nonempty string to recognize as a separator 078 * 079 * @return a splitter, with default settings, that recognizes that separator 080 */ 081 public static Splitter on(final String separator) { 082 requireNotEmpty(separator); 083 return new Splitter(SplitterConfig.builder().separator(separator).build()); 084 } 085 086 /** 087 * Returns a splitter that uses the given fixed string as a separator. For 088 * example, {@code 089 * Splitter.on(", ").split("foo, bar,baz")} returns an iterable containing 090 * {@code ["foo", 091 * "bar,baz"]}. 092 * 093 * @param separator the literal, nonempty string to recognize as a separator 094 * 095 * @return a splitter, with default settings, that recognizes that separator 096 */ 097 public static Splitter on(final char separator) { 098 requireNonNull(separator); 099 return new Splitter(SplitterConfig.builder().separator(String.valueOf(separator)).build()); 100 } 101 102 /** 103 * Returns a splitter that behaves equivalently to {@code this} splitter, but 104 * automatically omits empty strings from the results. For example, {@code 105 * Splitter.on(',').omitEmptyStrings().split(",a,,,b,c,,")} returns an iterable 106 * containing only {@code ["a", "b", "c"]}. 107 * 108 * <p> 109 * If either {@code trimResults} option is also specified when creating a 110 * splitter, that splitter always trims results first before checking for 111 * emptiness. So, for example, {@code 112 * Splitter.on(':').omitEmptyStrings().trimResults().split(": : : ")} returns an 113 * empty iterable. 114 * 115 * @return a splitter with the desired configuration 116 */ 117 public Splitter omitEmptyStrings() { 118 return new Splitter(splitterConfig.copy().omitEmptyStrings(true).build()); 119 } 120 121 /** 122 * Usually the separator will be pre-processed before being passed to 123 * {@link String#split(String)}. This is needed to mask special characters that 124 * will harm {@link Pattern#compile(String)}. If you want to disable this 125 * behavior and take care for your self you can change this method by calling 126 * this method. 127 * 128 * @return a splitter with the desired configuration 129 */ 130 public Splitter doNotModifySeparatorString() { 131 return new Splitter(splitterConfig.copy().doNotModifySeparatorString(true).build()); 132 } 133 134 /** 135 * Returns a splitter that behaves equivalently to {@code this} splitter, but 136 * automatically removes leading and trailing whitespace from each returned 137 * substring. For example, 138 * {@code Splitter.on(',').trimResults().split(" a, b ,c ")} returns an iterable 139 * containing {@code ["a", "b", "c"]}. 140 * 141 * @return a splitter with the desired configuration 142 */ 143 public Splitter trimResults() { 144 return new Splitter(splitterConfig.copy().trimResults(true).build()); 145 } 146 147 /** 148 * Returns a splitter that behaves equivalently to {@code this} splitter but 149 * stops splitting after it reaches the limit. The limit defines the maximum 150 * number of items returned by the iterator, or the maximum size of the list 151 * returned by {@link #splitToList}. 152 * 153 * <p> 154 * For example, {@code Splitter.on(',').limit(3).split("a,b,c,d")} returns an 155 * iterable containing {@code ["a", "b", "c,d"]}. When omitting empty strings, 156 * the omitted strings do not count. Hence, 157 * {@code Splitter.on(',').limit(3).omitEmptyStrings().split("a,,,b,,,c,d")} 158 * returns an iterable containing {@code ["a", "b", "c,d"}. When trim is 159 * requested, all entries are trimmed, including the last. Hence 160 * {@code Splitter.on(',').limit(3).trimResults().split(" a , b 161 * , c , d ")} results in {@code ["a", "b", "c , d"]}. 162 * 163 * @param maxItems the maximum number of items returned 164 * @return a splitter with the desired configuration 165 */ 166 public Splitter limit(int maxItems) { 167 checkArgument(maxItems > 0, "must be greater than zero: %s"); 168 return new Splitter(splitterConfig.copy().maxItems(maxItems).build()); 169 } 170 171 /** 172 * Splits {@code sequence} into string components and returns them as an 173 * immutable list. 174 * 175 * @param sequence the sequence of characters to split 176 * 177 * @return an immutable list of the segments split from the parameter 178 */ 179 public List<String> splitToList(String sequence) { 180 log.trace("Splitting String {} with configuration {}", sequence, splitterConfig); 181 if (isEmpty(sequence)) { 182 return Collections.emptyList(); 183 } 184 var splitted = sequence.split(handleSplitCharacter(splitterConfig.getSeparator()), 185 splitterConfig.getMaxItems()); 186 if (null == splitted || 0 == splitted.length) { 187 log.trace("No content to be returned for input {} and configuration {}", sequence, splitterConfig); 188 return Collections.emptyList(); 189 } 190 var builder = new CollectionBuilder<String>(); 191 192 for (String element : splitted) { 193 addIfApplicable(builder, element); 194 } 195 return builder.toImmutableList(); 196 } 197 198 private String handleSplitCharacter(String separator) { 199 if (splitterConfig.isDoNotModifySeparatorString()) { 200 return separator; 201 } 202 return Pattern.quote(separator); 203 } 204 205 private void addIfApplicable(CollectionBuilder<String> builder, String element) { 206 if (null == element) { 207 return; 208 } 209 var toDo = element; 210 if (splitterConfig.isTrimResults()) { 211 toDo = toDo.trim(); 212 } 213 if (!splitterConfig.isOmitEmptyStrings()) { 214 builder.add(toDo); 215 return; 216 } 217 if (!toDo.isEmpty()) { // Omit empty strings 218 builder.add(toDo); 219 } 220 } 221}