001/* 002 * Copyright © 2025 CUI-OpenSource-Software (info@cuioss.de) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.cuioss.http.security.validation; 017 018import de.cuioss.http.security.config.SecurityConfiguration; 019import de.cuioss.http.security.core.HttpSecurityValidator; 020import de.cuioss.http.security.core.UrlSecurityFailureType; 021import de.cuioss.http.security.core.ValidationType; 022import de.cuioss.http.security.exceptions.UrlSecurityException; 023import lombok.EqualsAndHashCode; 024import lombok.ToString; 025import org.jspecify.annotations.Nullable; 026 027import java.util.BitSet; 028import java.util.Optional; 029 030/** 031 * Character validation stage that enforces RFC-compliant character sets for HTTP components. 032 * 033 * <p>This stage validates input characters against component-specific allowed character sets, 034 * ensuring compliance with HTTP specifications and preventing character-based security attacks. 035 * It performs comprehensive character validation including null byte detection, control character 036 * filtering, and percent-encoding validation.</p> 037 * 038 * <h3>Design Principles</h3> 039 * <ul> 040 * <li><strong>RFC Compliance</strong> - Enforces RFC 3986 (URI) and RFC 7230 (HTTP) character rules</li> 041 * <li><strong>Security First</strong> - Rejects dangerous characters before any processing</li> 042 * <li><strong>Context Aware</strong> - Different character sets for different HTTP components</li> 043 * <li><strong>Performance Optimized</strong> - Uses BitSet for O(1) character lookups</li> 044 * <li><strong>Configurable</strong> - Allows fine-tuning of character validation rules</li> 045 * </ul> 046 * 047 * <h3>Character Validation Rules</h3> 048 * <ul> 049 * <li><strong>URL Paths</strong> - RFC 3986 unreserved + path-specific characters</li> 050 * <li><strong>Parameters</strong> - RFC 3986 query characters with percent-encoding support</li> 051 * <li><strong>Headers</strong> - RFC 7230 visible ASCII minus delimiters</li> 052 * <li><strong>Cookies</strong> - Restricted character set for cookie safety</li> 053 * <li><strong>Bodies</strong> - Content-type specific character validation</li> 054 * </ul> 055 * 056 * <h3>Security Features</h3> 057 * <ul> 058 * <li><strong>Null Byte Detection</strong> - Prevents null byte injection attacks</li> 059 * <li><strong>Control Character Filtering</strong> - Blocks dangerous control characters</li> 060 * <li><strong>Percent Encoding Validation</strong> - Validates hex digit sequences</li> 061 * <li><strong>High-Bit Character Control</strong> - Configurable handling of non-ASCII characters</li> 062 * </ul> 063 * 064 * <h3>Usage Examples</h3> 065 * <pre> 066 * // Create character validation stage 067 * SecurityConfiguration config = SecurityConfiguration.defaults(); 068 * CharacterValidationStage validator = new CharacterValidationStage(config, ValidationType.URL_PATH); 069 * 070 * // Validate URL path characters 071 * try { 072 * validator.validate("/api/users/123"); // Valid path characters 073 * validator.validate("/api/../etc/passwd"); // May contain invalid traversal patterns 074 * } catch (UrlSecurityException e) { 075 * logger.warn("Invalid characters detected: {}", e.getFailureType()); 076 * } 077 * 078 * // Validate parameter with percent encoding 079 * CharacterValidationStage paramValidator = new CharacterValidationStage(config, ValidationType.PARAMETER_VALUE); 080 * try { 081 * paramValidator.validate("hello%20world"); // Valid percent-encoded space 082 * paramValidator.validate("hello%00world"); // Null byte - will be rejected 083 * } catch (UrlSecurityException e) { 084 * logger.warn("Character validation failed: {}", e.getDetail()); 085 * } 086 * </pre> 087 * 088 * <h3>Configuration Options</h3> 089 * <ul> 090 * <li><strong>allowNullBytes</strong> - Whether to permit null bytes (default: false)</li> 091 * <li><strong>allowControlCharacters</strong> - Whether to permit control characters (default: false)</li> 092 * <li><strong>allowExtendedAscii</strong> - Whether to permit extended ASCII characters (128-255). 093 * <ul> 094 * <li>For URL paths and parameters: Allows characters 128-255 when enabled</li> 095 * <li>For header names and cookies: Always rejected per RFC (setting ignored)</li> 096 * <li>For header values and body: Enables both extended ASCII and Unicode support</li> 097 * <li>Note: Unicode beyond 255 is always rejected for URLs per RFC 3986</li> 098 * </ul> 099 * (default: false)</li> 100 * </ul> 101 * 102 * <h3>Performance Characteristics</h3> 103 * <ul> 104 * <li>O(n) time complexity where n is input length</li> 105 * <li>O(1) character lookup using BitSet</li> 106 * <li>Early termination on first invalid character</li> 107 * <li>Minimal memory allocation during validation</li> 108 * </ul> 109 * 110 * @see CharacterValidationConstants 111 * @see SecurityConfiguration 112 * @see ValidationType 113 * @since 1.0 114 */ 115@EqualsAndHashCode 116@ToString 117public final class CharacterValidationStage implements HttpSecurityValidator { 118 119 private final BitSet allowedChars; 120 private final ValidationType validationType; 121 private final boolean allowPercentEncoding; 122 private final boolean allowNullBytes; 123 private final boolean allowControlCharacters; 124 private final boolean allowExtendedAscii; 125 126 public CharacterValidationStage(SecurityConfiguration config, ValidationType type) { 127 this.validationType = type; 128 this.allowNullBytes = config.allowNullBytes(); 129 this.allowControlCharacters = config.allowControlCharacters(); 130 this.allowExtendedAscii = config.allowExtendedAscii(); 131 // Use the shared BitSet directly - it's read-only after initialization 132 this.allowedChars = CharacterValidationConstants.getCharacterSet(type); 133 134 // Determine if percent encoding is allowed based on type 135 this.allowPercentEncoding = switch (type) { 136 case URL_PATH, PARAMETER_NAME, PARAMETER_VALUE -> true; 137 default -> false; // HEADER_NAME, HEADER_VALUE and others don't allow percent encoding 138 }; 139 } 140 141 @Override 142 @SuppressWarnings("squid:S3516") 143 public Optional<String> validate(@Nullable String value) throws UrlSecurityException { 144 // Quick check for null/empty 145 if (value == null) { 146 return Optional.empty(); 147 } 148 if (value.isEmpty()) { 149 return Optional.of(value); 150 } 151 152 validateCharacters(value); 153 return Optional.of(value); 154 } 155 156 /** 157 * Validates all characters in the input string. 158 * @param value The string to validate 159 * @throws UrlSecurityException if any character validation fails 160 */ 161 private void validateCharacters(String value) throws UrlSecurityException { 162 int i = 0; 163 while (i < value.length()) { 164 char ch = value.charAt(i); 165 166 // Check for null byte FIRST (highest priority security check) 167 if (ch == '\0') { 168 handleNullByte(value, i); 169 } 170 171 // Handle percent encoding 172 if (ch == '%' && allowPercentEncoding) { 173 validatePercentEncoding(value, i); 174 i += 3; // Skip the percent sign and two hex digits 175 continue; 176 } 177 178 // Check if character is allowed based on configuration and character sets 179 if (!isCharacterAllowed(ch)) { 180 handleInvalidCharacter(value, ch, i); 181 } 182 i++; 183 } 184 } 185 186 /** 187 * Handles null byte detection. 188 * @param value The original input string 189 * @param position The position of the null byte 190 * @throws UrlSecurityException if null bytes are not allowed 191 */ 192 private void handleNullByte(String value, int position) throws UrlSecurityException { 193 if (!allowNullBytes) { 194 throw UrlSecurityException.builder() 195 .failureType(UrlSecurityFailureType.NULL_BYTE_INJECTION) 196 .validationType(validationType) 197 .originalInput(value) 198 .detail("Null byte detected at position " + position) 199 .build(); 200 } 201 } 202 203 /** 204 * Handles invalid character detection. 205 * @param value The original input string 206 * @param ch The invalid character 207 * @param position The position of the invalid character 208 * @throws UrlSecurityException for the invalid character 209 */ 210 private void handleInvalidCharacter(String value, char ch, int position) throws UrlSecurityException { 211 UrlSecurityFailureType failureType = getFailureTypeForCharacter(ch); 212 throw UrlSecurityException.builder() 213 .failureType(failureType) 214 .validationType(validationType) 215 .originalInput(value) 216 .detail("Invalid character '" + ch + "' (0x" + Integer.toHexString(ch).toUpperCase() + ") at position " + position) 217 .build(); 218 } 219 220 /** 221 * Validates percent encoding at the given position. 222 * @param value The string to validate 223 * @param position The position of the percent sign 224 * @throws UrlSecurityException if the percent encoding is invalid 225 */ 226 private void validatePercentEncoding(String value, int position) throws UrlSecurityException { 227 // Must be followed by two hex digits 228 if (position + 2 >= value.length()) { 229 throw UrlSecurityException.builder() 230 .failureType(UrlSecurityFailureType.INVALID_ENCODING) 231 .validationType(validationType) 232 .originalInput(value) 233 .detail("Incomplete percent encoding at position " + position) 234 .build(); 235 } 236 237 char hex1 = value.charAt(position + 1); 238 char hex2 = value.charAt(position + 2); 239 if (isNotHexDigit(hex1) || isNotHexDigit(hex2)) { 240 throw UrlSecurityException.builder() 241 .failureType(UrlSecurityFailureType.INVALID_ENCODING) 242 .validationType(validationType) 243 .originalInput(value) 244 .detail("Invalid hex digits in percent encoding at position " + position) 245 .build(); 246 } 247 248 // Check for encoded null byte %00 249 if (hex1 == '0' && hex2 == '0' && !allowNullBytes) { 250 throw UrlSecurityException.builder() 251 .failureType(UrlSecurityFailureType.NULL_BYTE_INJECTION) 252 .validationType(validationType) 253 .originalInput(value) 254 .detail("Encoded null byte (%00) detected at position " + position) 255 .build(); 256 } 257 } 258 259 private boolean isNotHexDigit(char ch) { 260 return !((ch >= '0' && ch <= '9') || 261 (ch >= 'A' && ch <= 'F') || 262 (ch >= 'a' && ch <= 'f')); 263 } 264 265 /** 266 * Checks if a character is allowed based on configuration flags and character sets. 267 */ 268 private boolean isCharacterAllowed(char ch) { 269 // Null byte (0) - should be allowed if configured (already checked earlier but may reach here) 270 if (ch == 0) { 271 return allowNullBytes; 272 } 273 274 // Control characters (1-31, excluding null which is handled above) 275 if (ch <= 31) { 276 // Always allow common whitespace characters that are in the base character set 277 if (allowedChars.get(ch)) { 278 return true; 279 } 280 // Other control characters depend on configuration 281 return allowControlCharacters; 282 } 283 284 // Characters 32-127 (basic ASCII) - check against the base character set 285 if (ch <= 127) { 286 return allowedChars.get(ch); 287 } 288 289 // Extended ASCII characters (128-255) 290 // Different validation types have different rules for extended ASCII 291 if (ch <= 255) { 292 // Header names and cookie names/values must be ASCII-only per RFC 293 if (validationType == ValidationType.HEADER_NAME || 294 validationType == ValidationType.COOKIE_NAME || 295 validationType == ValidationType.COOKIE_VALUE) { 296 return false; // RFC compliance: these must be ASCII-only 297 } 298 // For other types (URL paths, parameters, header values, body), 299 // allow extended ASCII based on configuration 300 return allowExtendedAscii || allowedChars.get(ch); 301 } 302 303 // Unicode characters above 255: 304 // For URLs (paths/parameters): Always rejected per RFC 3986 (ASCII-only) 305 // For headers/body: Allowed if allowExtendedAscii is true (which enables full Unicode support for these contexts) 306 // Always reject combining characters (U+0300-U+036F) as they can cause normalization issues 307 if (ch >= 0x0300 && ch <= 0x036F) { 308 return false; 309 } 310 // The allowExtendedAscii flag controls both extended ASCII and Unicode for applicable validation types 311 return allowExtendedAscii && supportsUnicodeCharacters(); 312 } 313 314 /** 315 * Determines the appropriate failure type for a rejected character. 316 */ 317 private UrlSecurityFailureType getFailureTypeForCharacter(char ch) { 318 // Null byte (0) 319 if (ch == 0) { 320 return UrlSecurityFailureType.NULL_BYTE_INJECTION; 321 } 322 323 // Control characters (1-31) 324 if (ch <= 31) { 325 // For headers, control characters are just invalid characters per RFC 326 // For other contexts, they're specifically flagged as control characters for security 327 if (validationType == ValidationType.HEADER_NAME || validationType == ValidationType.HEADER_VALUE) { 328 return UrlSecurityFailureType.INVALID_CHARACTER; 329 } 330 // If it's in the base character set, it's just an invalid character for this context 331 if (allowedChars.get(ch)) { 332 return UrlSecurityFailureType.INVALID_CHARACTER; 333 } 334 return UrlSecurityFailureType.CONTROL_CHARACTERS; 335 } 336 337 // All other invalid characters (including high-bit and Unicode) 338 return UrlSecurityFailureType.INVALID_CHARACTER; 339 } 340 341 /** 342 * Determines if the current validation type supports Unicode characters beyond 255. 343 * URL paths and parameter validation are restricted to ASCII per RFC 3986, 344 * while body and header content can contain Unicode when allowExtendedAscii is enabled. 345 * 346 * Note: This method works in conjunction with allowExtendedAscii flag: 347 * - For URLs/parameters: Always returns false (ASCII-only per RFC) 348 * - For headers/body: Returns true, allowing Unicode when allowExtendedAscii is enabled 349 */ 350 private boolean supportsUnicodeCharacters() { 351 return switch (validationType) { 352 case BODY -> true; // Body content can contain Unicode 353 case HEADER_VALUE -> true; // Header values can contain Unicode in some cases 354 case URL_PATH, PARAMETER_NAME, PARAMETER_VALUE -> false; // RFC 3986 is ASCII-based 355 case HEADER_NAME -> false; // Header names should be ASCII 356 case COOKIE_NAME, COOKIE_VALUE -> false; // Cookies should be ASCII-safe 357 }; 358 } 359}