001/* 002 * Copyright © 2025 CUI-OpenSource-Software (info@cuioss.de) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.cuioss.http.security.validation; 017 018import de.cuioss.http.security.config.SecurityConfiguration; 019import de.cuioss.http.security.core.HttpSecurityValidator; 020import de.cuioss.http.security.core.UrlSecurityFailureType; 021import de.cuioss.http.security.core.ValidationType; 022import de.cuioss.http.security.exceptions.UrlSecurityException; 023import org.jspecify.annotations.Nullable; 024 025import java.util.ArrayList; 026import java.util.List; 027import java.util.Optional; 028import java.util.function.Predicate; 029import java.util.regex.Pattern; 030 031/** 032 * Path normalization validation stage with security checks. 033 * 034 * <p>This stage performs RFC 3986 Section 5.2.4 path normalization to resolve 035 * relative path segments (. and ..) while detecting and preventing path traversal 036 * attacks. The stage processes paths through multiple security layers:</p> 037 * 038 * <ol> 039 * <li><strong>Segment Parsing</strong> - Splits path into segments for processing</li> 040 * <li><strong>Normalization</strong> - Resolves . and .. segments according to RFC 3986</li> 041 * <li><strong>Security Validation</strong> - Detects remaining traversal attempts</li> 042 * <li><strong>Root Escape Detection</strong> - Prevents escaping application root</li> 043 * </ol> 044 * 045 * <h3>Design Principles</h3> 046 * <ul> 047 * <li><strong>RFC Compliance</strong> - Follows RFC 3986 path normalization rules</li> 048 * <li><strong>Security First</strong> - Detects attacks through normalization analysis</li> 049 * <li><strong>DoS Protection</strong> - Prevents excessive nesting and recursion attacks</li> 050 * <li><strong>Thread Safety</strong> - Safe for concurrent use across multiple threads</li> 051 * </ul> 052 * 053 * <h3>Security Validations</h3> 054 * <ul> 055 * <li><strong>Path Traversal</strong> - Detects ../ patterns that remain after normalization</li> 056 * <li><strong>Root Escape</strong> - Prevents paths from escaping the application root</li> 057 * <li><strong>Excessive Nesting</strong> - Limits path depth to prevent resource exhaustion</li> 058 * <li><strong>Malicious Patterns</strong> - Identifies suspicious path construction</li> 059 * </ul> 060 * 061 * <h3>Usage Examples</h3> 062 * <pre> 063 * // Create normalization stage 064 * SecurityConfiguration config = SecurityConfiguration.defaults(); 065 * NormalizationStage normalizer = new NormalizationStage(config, ValidationType.URL_PATH); 066 * 067 * // Normalize legitimate path 068 * String normalized = normalizer.validate("/api/users/./123/../456"); 069 * // Returns: "/api/users/456" 070 * 071 * // Detect path traversal attack 072 * try { 073 * normalizer.validate("/api/../../etc/passwd"); 074 * // Throws UrlSecurityException with DIRECTORY_ESCAPE_ATTEMPT 075 * } catch (UrlSecurityException e) { 076 * logger.warn("Path traversal blocked: {}", e.getFailureType()); 077 * } 078 * 079 * // Detect excessive nesting attack 080 * try { 081 * normalizer.validate("/a/../b/../c/../d/../e/../f/../g/../h/../i/../j/../k/../l/../m/../n/../o/../p/../q/../r/../s/../t"); 082 * // Throws UrlSecurityException with EXCESSIVE_NESTING 083 * } catch (UrlSecurityException e) { 084 * logger.warn("DoS attack blocked: {}", e.getFailureType()); 085 * } 086 * </pre> 087 * 088 * <h3>Performance Characteristics</h3> 089 * <ul> 090 * <li>O(n) time complexity where n is the number of path segments</li> 091 * <li>Single pass through path segments with early termination</li> 092 * <li>Minimal memory allocation - reuses StringBuilder</li> 093 * <li>DoS protection through segment counting</li> 094 * </ul> 095 * 096 * <h3>RFC 3986 Compliance</h3> 097 * <p>This implementation follows RFC 3986 Section 5.2.4 "Remove Dot Segments":</p> 098 * <ul> 099 * <li>Single dot segments (.) are removed</li> 100 * <li>Double dot segments (..) remove the previous segment</li> 101 * <li>Trailing slashes are preserved</li> 102 * <li>Leading slashes are preserved</li> 103 * </ul> 104 * <p> 105 * Implements: Task V2 from HTTP verification specification 106 * 107 * @param config Security configuration controlling validation behavior. 108 * @param validationType Type of validation being performed (URL_PATH, PARAMETER_NAME, etc.). 109 * @see HttpSecurityValidator 110 * @see SecurityConfiguration 111 * @see ValidationType 112 * @since 1.0 113 */ 114public record NormalizationStage(SecurityConfiguration config, 115ValidationType validationType) implements HttpSecurityValidator { 116 117 /** 118 * Maximum number of path segments to prevent DoS attacks. 119 * This limit prevents excessive processing time from deeply nested paths. 120 */ 121 private static final int MAX_PATH_SEGMENTS = 1000; 122 123 /** 124 * Maximum directory depth to prevent excessive nesting attacks. 125 * Based on common filesystem and application server limits. 126 */ 127 private static final int MAX_DIRECTORY_DEPTH = 100; 128 129 /** 130 * Precompiled pattern to detect URLs with protocol schemes. 131 * Matches RFC 3986 scheme format: scheme://authority/path 132 * Used to prevent normalization of protocol portions in URLs. 133 */ 134 private static final Pattern URL_WITH_PROTOCOL_PATTERN = Pattern.compile("^[a-zA-Z][a-zA-Z0-9+.-]*://.*"); 135 136 /** 137 * Pattern to detect suspicious single-component directory traversal. 138 * Matches patterns like "valid/../segment" where a single path segment (not starting with ..) 139 * precedes "../" and is followed by another path segment. 140 * Updated to handle RFC 3986 allowed characters including dots, tildes, and sub-delimiters. 141 */ 142 static final Pattern SINGLE_COMPONENT_TRAVERSAL_PATTERN = Pattern.compile("^(?!\\.\\./)[^/\\\\]+/\\.\\./[^/\\\\]+$"); 143 144 /** 145 * Pattern to detect multiple consecutive dots with path separators. 146 * Matches patterns like ".../" or "...\\" which could be traversal bypass attempts. 147 * Does not match legitimate filenames like "file...txt". 148 * Uses .find() with simple pattern to prevent ReDoS attacks. 149 */ 150 static final Pattern MULTIPLE_DOTS_WITH_SEPARATOR_PATTERN = Pattern.compile("\\.{3,}[/\\\\]"); 151 152 /** 153 * Pattern for splitting paths on forward slash or backslash separators. 154 * Used for parsing path segments during traversal detection. 155 */ 156 static final Pattern PATH_SEPARATOR_PATTERN = Pattern.compile("[/\\\\]"); 157 158 /** 159 * Pattern to detect paths ending with "/..". 160 * Matches paths that end with forward slash followed by double dot. 161 */ 162 static final Pattern ENDS_WITH_SLASH_DOTDOT_PATTERN = Pattern.compile(".*/\\.\\.$"); 163 164 /** 165 * Pattern to detect paths starting with "../". 166 * Matches paths that begin with double dot followed by forward slash. 167 */ 168 static final Pattern STARTS_WITH_DOTDOT_SLASH_PATTERN = Pattern.compile("^\\.\\./.*"); 169 170 /** 171 * Pattern to detect paths starting with "..\\". 172 * Matches paths that begin with double dot followed by backslash. 173 */ 174 static final Pattern STARTS_WITH_DOTDOT_BACKSLASH_PATTERN = Pattern.compile("^\\.\\.\\\\..*"); 175 176 /** 177 * Pattern to detect internal slash-dotdot patterns. 178 * Matches "/" followed by ".." only when it's a directory traversal (followed by "/" or end of string). 179 * This avoids false positives for filenames starting with ".." like "a/..c" 180 * Optimized for .find() usage without unnecessary .* wrappers. 181 */ 182 static final Pattern CONTAINS_SLASH_DOTDOT_PATTERN = Pattern.compile("/\\.\\.(?:/|$)"); 183 184 /** 185 * Pattern to detect internal dotdot-backslash patterns. 186 * Matches ".." followed by "\\" anywhere in the path. 187 * Used in conjunction with STARTS_WITH_DOTDOT_BACKSLASH_PATTERN to exclude initial "..\\". 188 * Optimized for .find() usage without unnecessary .* wrappers. 189 */ 190 static final Pattern CONTAINS_DOTDOT_BACKSLASH_PATTERN = Pattern.compile("\\.\\.\\\\"); 191 192 193 /** 194 * Validates and normalizes a path with comprehensive security checks. 195 * 196 * <p>Processing stages:</p> 197 * <ol> 198 * <li>Input validation - handles null/empty inputs</li> 199 * <li>Path segment parsing - splits on directory separators</li> 200 * <li>RFC 3986 normalization - resolves . and .. segments</li> 201 * <li>Security validation - detects remaining attack patterns</li> 202 * </ol> 203 * 204 * @param value The input path to validate and normalize 205 * @return The validated and normalized path wrapped in Optional, or Optional.empty() if input was null 206 * @throws UrlSecurityException if any security violations are detected: 207 * <ul> 208 * <li>EXCESSIVE_NESTING - if path contains too many segments or depth</li> 209 * <li>PATH_TRAVERSAL_DETECTED - if ../ patterns remain after normalization</li> 210 * <li>DIRECTORY_ESCAPE_ATTEMPT - if normalized path tries to escape root</li> 211 * </ul> 212 */ 213 @Override 214 public Optional<String> validate(@Nullable String value) throws UrlSecurityException { 215 if (value == null) { 216 return Optional.empty(); 217 } 218 if (value.isEmpty()) { 219 return Optional.of(value); 220 } 221 222 // Save original for comparison and error reporting 223 @SuppressWarnings("UnnecessaryLocalVariable") // Used in exception handling below 224 String original = value; 225 226 // LAYER 1: Semantic Intent Validation - Detect directory traversal patterns BEFORE normalization 227 // This follows OWASP/CISA best practices for defense in depth 228 if (containsDirectoryTraversalIntent(original)) { 229 throw UrlSecurityException.builder() 230 .failureType(UrlSecurityFailureType.PATH_TRAVERSAL_DETECTED) 231 .validationType(validationType) 232 .originalInput(original) 233 .detail("Directory traversal pattern detected in input") 234 .build(); 235 } 236 237 // Normalize URI components (resolve . and .. in path segments) 238 String normalized = normalizeUriComponent(value); 239 240 // Check if path escapes root after normalization (check first for proper precedence) 241 if (escapesRoot(normalized)) { 242 throw UrlSecurityException.builder() 243 .failureType(UrlSecurityFailureType.DIRECTORY_ESCAPE_ATTEMPT) 244 .validationType(validationType) 245 .originalInput(original) 246 .sanitizedInput(normalized) 247 .detail("Path attempts to escape root directory") 248 .build(); 249 } 250 251 // LAYER 2: Syntactic Validation - Check for remaining traversal patterns after normalization 252 if (containsInternalPathTraversal(normalized)) { 253 throw UrlSecurityException.builder() 254 .failureType(UrlSecurityFailureType.PATH_TRAVERSAL_DETECTED) 255 .validationType(validationType) 256 .originalInput(original) 257 .sanitizedInput(normalized) 258 .detail("Path normalization revealed traversal attempt") 259 .build(); 260 } 261 262 return Optional.of(normalized); 263 } 264 265 /** 266 * Normalizes URI components according to RFC 3986 with DoS protection. 267 * 268 * <p>This method implements RFC 3986 Section 5.2.4 "Remove Dot Segments" algorithm 269 * for path components, while preserving complete URIs with protocol schemes. 270 * Includes additional security measures to prevent resource exhaustion attacks.</p> 271 * 272 * @param uriComponent The URI component to normalize (path segment or complete URI) 273 * @return The normalized URI component 274 * @throws UrlSecurityException if processing limits are exceeded 275 */ 276 private String normalizeUriComponent(String uriComponent) { 277 // Check if this is a complete URI with protocol - don't normalize protocol portion 278 if (URL_WITH_PROTOCOL_PATTERN.matcher(uriComponent).matches()) { 279 return uriComponent; 280 } 281 282 // RFC 3986 path segment normalization with recursion protection 283 String[] segments = uriComponent.split("/", -1); 284 List<String> outputSegments = new ArrayList<>(); 285 boolean isAbsolute = uriComponent.startsWith("/"); 286 287 // Validate segment count 288 validateSegmentCount(segments.length, uriComponent); 289 290 // Process each segment 291 for (String segment : segments) { 292 processPathSegment(segment, outputSegments, isAbsolute, uriComponent); 293 } 294 295 // Build and return normalized path 296 return buildNormalizedPath(outputSegments, isAbsolute, uriComponent); 297 } 298 299 /** 300 * Validates that the segment count does not exceed security limits. 301 * 302 * @param segmentCount Number of path segments 303 * @param originalInput Original input for error reporting 304 * @throws UrlSecurityException if segment count exceeds limits 305 */ 306 private void validateSegmentCount(int segmentCount, String originalInput) { 307 if (segmentCount > MAX_PATH_SEGMENTS) { 308 throw UrlSecurityException.builder() 309 .failureType(UrlSecurityFailureType.EXCESSIVE_NESTING) 310 .validationType(validationType) 311 .originalInput(originalInput) 312 .detail("Path contains too many segments: " + segmentCount + " (max: " + MAX_PATH_SEGMENTS + ")") 313 .build(); 314 } 315 } 316 317 /** 318 * Processes a single path segment according to RFC 3986 normalization rules. 319 * 320 * @param segment Path segment to process 321 * @param outputSegments Current output segments list 322 * @param isAbsolute Whether this is an absolute path 323 * @param originalInput Original input for error reporting 324 * @throws UrlSecurityException if depth limits are exceeded 325 */ 326 private void processPathSegment(String segment, List<String> outputSegments, boolean isAbsolute, String originalInput) { 327 switch (segment) { 328 case "." -> { 329 // Current directory - skip (RFC 3986 Section 5.2.4) 330 } 331 case ".." -> { 332 // Parent directory 333 if (!outputSegments.isEmpty() && !"..".equals(outputSegments.getLast())) { 334 // Can resolve this .. by removing the previous segment 335 outputSegments.removeLast(); 336 } else if (!isAbsolute) { 337 // For relative paths, keep .. if we can't resolve it 338 outputSegments.add(".."); 339 } 340 // For absolute paths, .. at root is ignored 341 } 342 case "" -> { 343 // Empty segment - only preserve for leading slash or trailing slash 344 // Skip empty segments from double slashes in the middle 345 } 346 default -> { 347 // Normal segment 348 outputSegments.add(segment); 349 validateDirectoryDepth(outputSegments.size(), originalInput); 350 } 351 } 352 } 353 354 /** 355 * Validates that directory depth does not exceed security limits. 356 * 357 * @param currentDepth Current directory depth 358 * @param originalInput Original input for error reporting 359 * @throws UrlSecurityException if depth exceeds limits 360 */ 361 private void validateDirectoryDepth(int currentDepth, String originalInput) { 362 if (currentDepth > MAX_DIRECTORY_DEPTH) { 363 throw UrlSecurityException.builder() 364 .failureType(UrlSecurityFailureType.EXCESSIVE_NESTING) 365 .validationType(validationType) 366 .originalInput(originalInput) 367 .detail("Path depth " + currentDepth + " exceeds maximum " + MAX_DIRECTORY_DEPTH) 368 .build(); 369 } 370 } 371 372 /** 373 * Builds the normalized path string from processed segments. 374 * 375 * @param outputSegments Processed path segments 376 * @param isAbsolute Whether this is an absolute path 377 * @param originalInput Original input for trailing slash preservation 378 * @return Normalized path string 379 */ 380 private String buildNormalizedPath(List<String> outputSegments, boolean isAbsolute, String originalInput) { 381 StringBuilder result = new StringBuilder(); 382 383 // Add leading slash for absolute paths 384 if (isAbsolute) { 385 result.append("/"); 386 } 387 388 // Add segments 389 for (int i = 0; i < outputSegments.size(); i++) { 390 if (i > 0) { 391 result.append("/"); 392 } 393 result.append(outputSegments.get(i)); 394 } 395 396 // Preserve trailing slash if present and we have content, or for root path 397 if (originalInput.endsWith("/") && !result.toString().endsWith("/") && (!outputSegments.isEmpty() || isAbsolute)) { 398 result.append("/"); 399 } 400 401 return result.toString(); 402 } 403 404 /** 405 * Detects directory traversal intent patterns in the original input before normalization. 406 * 407 * <p>This method implements semantic validation following OWASP/CISA best practices 408 * for defense in depth. It identifies patterns that indicate malicious directory 409 * navigation intent, such as "valid/../segment", regardless of normalization outcome.</p> 410 * 411 * <p>Based on research analysis of CVEs: 412 * <a href="https://nvd.nist.gov/vuln/detail/CVE-2021-41773">CVE-2021-41773</a>, 413 * <a href="https://nvd.nist.gov/vuln/detail/CVE-2021-42013">CVE-2021-42013</a>, 414 * <a href="https://nvd.nist.gov/vuln/detail/CVE-2024-38819">CVE-2024-38819</a> 415 * and industry best practices, patterns like "directory/../target" represent attack 416 * fingerprints that should be rejected semantically before syntactic processing.</p> 417 * 418 * @param input The original input path to analyze for traversal intent 419 * @return true if the input contains directory traversal patterns indicating malicious intent 420 */ 421 private boolean containsDirectoryTraversalIntent(String input) { 422 // Based on research, focus on specific attack patterns while allowing legitimate RFC 3986 navigation 423 424 // Pattern 1: Suspicious single-component traversal patterns 425 // This targets cases like "valid/../segment" where a single word precedes "../" 426 // but allows legitimate multi-level paths like "/api/users/../admin" 427 if (SINGLE_COMPONENT_TRAVERSAL_PATTERN.matcher(input).matches()) { 428 return true; 429 } 430 431 // Pattern 2: Encoded traversal attempts (based on Apache CVE research) 432 // Covers URL encoded variants like "..%2e/" or "%2e%2e/" 433 if (input.contains("..%") || input.contains("%2e%2e") || input.contains("%2E%2E")) { 434 return true; 435 } 436 437 // Pattern 3: Multiple consecutive dots with separators (traversal bypass attempts) 438 // Covers ".../" but NOT "file...txt" 439 if (MULTIPLE_DOTS_WITH_SEPARATOR_PATTERN.matcher(input).find()) { 440 return true; 441 } 442 443 // Pattern 4: Windows-style backslash traversal (but not if it starts with ..) 444 // Patterns starting with .. should be handled by escapesRoot check 445 return CONTAINS_DOTDOT_BACKSLASH_PATTERN.matcher(input).find() && 446 !STARTS_WITH_DOTDOT_BACKSLASH_PATTERN.matcher(input).matches(); 447 } 448 449 /** 450 * Checks if the normalized path contains internal path traversal patterns. 451 * 452 * <p>After proper normalization, there should be no remaining .. segments 453 * except at the beginning for relative paths (which is handled by escapesRoot). 454 * This method performs comprehensive checks for any remaining traversal patterns 455 * that could indicate incomplete normalization or sophisticated attacks.</p> 456 * 457 * @param path The normalized path to check 458 * @return true if path contains internal traversal patterns 459 */ 460 private boolean containsInternalPathTraversal(String path) { 461 // After normalization, check for .. segments that aren't at the start 462 if (CONTAINS_SLASH_DOTDOT_PATTERN.matcher(path).find()) { 463 return true; 464 } 465 466 // For backslash patterns, exclude those starting with ..\\ (handled by escapesRoot) 467 if (CONTAINS_DOTDOT_BACKSLASH_PATTERN.matcher(path).find() && 468 !STARTS_WITH_DOTDOT_BACKSLASH_PATTERN.matcher(path).matches()) { 469 return true; 470 } 471 472 // Check for .. at end of path (without leading ../) 473 if (ENDS_WITH_SLASH_DOTDOT_PATTERN.matcher(path).matches() && 474 !STARTS_WITH_DOTDOT_SLASH_PATTERN.matcher(path).matches()) { 475 return true; 476 } 477 478 // Check for standalone .. that isn't at the beginning 479 if ("..".equals(path)) { 480 return true; 481 } 482 483 // Additional security: check for any .. that appears as a complete path segment 484 // This catches cases where .. remains as directory navigation after normalization 485 // but excludes .. that appears embedded within filenames (fixing false positives) 486 if (path.contains("..")) { 487 // Check if .. appears as a complete path segment (separated by slashes) 488 String[] segments = PATH_SEPARATOR_PATTERN.split(path); 489 for (String segment : segments) { 490 if ("..".equals(segment)) { 491 return true; 492 } 493 } 494 } 495 496 return false; 497 } 498 499 /** 500 * Checks if the normalized path attempts to escape the application root. 501 * 502 * <p>This check identifies paths that would navigate outside the intended 503 * directory structure after normalization.</p> 504 * 505 * @param path The normalized path to check 506 * @return true if path attempts to escape root 507 */ 508 private boolean escapesRoot(String path) { 509 // Check if normalized path tries to escape root 510 return STARTS_WITH_DOTDOT_SLASH_PATTERN.matcher(path).matches() || 511 STARTS_WITH_DOTDOT_BACKSLASH_PATTERN.matcher(path).matches(); 512 } 513 514 /** 515 * Creates a conditional validator that only processes inputs matching the condition. 516 * 517 * @param condition The condition to test before validation 518 * @return A conditional HttpSecurityValidator that applies normalization conditionally 519 */ 520 @Override 521 public HttpSecurityValidator when(Predicate<String> condition) { 522 return input -> { 523 if (input == null || !condition.test(input)) { 524 return Optional.ofNullable(input); 525 } 526 return validate(input); 527 }; 528 } 529 530 531}