001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.jexl2.parser;
018
019 /**
020 * Common constant strings utilities.
021 * <p>
022 * This package methods read JEXL string literals and handle escaping through the
023 * 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single
024 * and double quotes) and read Unicode hexadecimal encoded characters.
025 * </p>
026 * <p>
027 * The only escapable characters are the single and double quotes - ''' and '"' -,
028 * a Unicode sequence starting with 'u' followed by 4 hexadecimals and
029 * the backslash character - '\' - itself.
030 * </p>
031 * <p>
032 * A sequence where '\' occurs before any non-escapable character or sequence has no effect, the
033 * sequence output being the same as the input.
034 * </p>
035 */
036 public class StringParser {
037 /** Default constructor. */
038 public StringParser() {}
039
040 /**
041 * Builds a string, handles escaping through '\' syntax.
042 * @param str the string to build from
043 * @param eatsep whether the separator, the first character, should be considered
044 * @return the built string
045 */
046 public static String buildString(CharSequence str, boolean eatsep) {
047 StringBuilder strb = new StringBuilder(str.length());
048 char sep = eatsep ? str.charAt(0) : 0;
049 int end = str.length() - (eatsep ? 1 : 0);
050 int begin = (eatsep ? 1 : 0);
051 read(strb, str, begin, end, sep);
052 return strb.toString();
053 }
054
055 /**
056 * Read the remainder of a string till a given separator,
057 * handles escaping through '\' syntax.
058 * @param strb the destination buffer to copy characters into
059 * @param str the origin
060 * @param index the offset into the origin
061 * @param sep the separator, single or double quote, marking end of string
062 * @return the offset in origin
063 */
064 public static int readString(StringBuilder strb, CharSequence str, int index, char sep) {
065 return read(strb, str, index, str.length(), sep);
066 }
067
068 /**
069 * Read the remainder of a string till a given separator,
070 * handles escaping through '\' syntax.
071 * @param strb the destination buffer to copy characters into
072 * @param str the origin
073 * @param begin the relative offset in str to begin reading
074 * @param end the relative offset in str to end reading
075 * @param sep the separator, single or double quote, marking end of string
076 * @return the last character offset handled in origin
077 */
078 private static int read(StringBuilder strb, CharSequence str, int begin, int end, char sep) {
079 boolean escape = false;
080 int index = begin;
081 for (; index < end; ++index) {
082 char c = str.charAt(index);
083 if (escape) {
084 if (c == 'u' && (index + 4) < end && readUnicodeChar(strb, str, index + 1) > 0) {
085 index += 4;
086 }
087 else {
088 // if c is not an escapable character, re-emmit the backslash before it
089 boolean notSeparator = sep == 0? c != '\'' && c != '"' : c != sep;
090 if (notSeparator && c != '\\' ) {
091 strb.append('\\');
092 }
093 strb.append(c);
094 }
095 escape = false;
096 continue;
097 }
098 if (c == '\\') {
099 escape = true;
100 continue;
101 }
102 strb.append(c);
103 if (c == sep) {
104 break;
105 }
106 }
107 return index;
108 }
109
110 /**
111 * Reads a Unicode escape character.
112 * @param strb the builder to write the character to
113 * @param str the sequence
114 * @param begin the begin offset in sequence (after the '\\u')
115 * @return 0 if char could not be read, 4 otherwise
116 */
117 private static final int readUnicodeChar(StringBuilder strb, CharSequence str, int begin) {
118 char xc = 0;
119 int bits = 12;
120 int value = 0;
121 for(int offset = 0; offset < 4; ++offset) {
122 char c = str.charAt(begin + offset);
123 if (c >= '0' && c <= '9') {
124 value = (c - '0');
125 }
126 else if (c >= 'a' && c <= 'h') {
127 value = (c - 'a' + 10);
128 }
129 else if (c >= 'A' && c <= 'H') {
130 value = (c - 'A' + 10);
131 }
132 else {
133 return 0;
134 }
135 xc |= value << bits;
136 bits -= 4;
137 }
138 strb.append(xc);
139 return 4;
140 }
141
142 /**
143 * Escapes a String representation, expand non-ASCII characters as Unicode escape sequence.
144 * @param str the string to escape
145 * @return the escaped representation
146 */
147 public static String escapeString(String str) {
148 if (str == null) {
149 return null;
150 }
151 final int length = str.length();
152 StringBuilder strb = new StringBuilder(length + 2);
153 strb.append('\'');
154 for (int i = 0; i < length; ++i) {
155 char c = str.charAt(i);
156 if (c < 127) {
157 if (c == '\'') {
158 // escape quote
159 strb.append('\\');
160 strb.append('\'');
161 } else if (c == '\\') {
162 // escape backslash
163 strb.append('\\');
164 strb.append('\\');
165 } else {
166 strb.append(c);
167 }
168 } else {
169 // convert to Unicode escape sequence
170 strb.append('\\');
171 strb.append('u');
172 String hex = Integer.toHexString(c);
173 for (int h = hex.length(); h < 4; ++h) {
174 strb.append('0');
175 }
176 strb.append(hex);
177 }
178 }
179 strb.append('\'');
180 return strb.toString();
181 }
182 }