001/* 002 * Copyright © 2025 CUI-OpenSource-Software (info@cuioss.de) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.cuioss.http.security.validation; 017 018import de.cuioss.http.security.config.SecurityConfiguration; 019import de.cuioss.http.security.core.HttpSecurityValidator; 020import de.cuioss.http.security.core.UrlSecurityFailureType; 021import de.cuioss.http.security.core.ValidationType; 022import de.cuioss.http.security.exceptions.UrlSecurityException; 023import org.jspecify.annotations.Nullable; 024 025import java.net.URLDecoder; 026import java.nio.charset.StandardCharsets; 027import java.text.Normalizer; 028import java.util.Optional; 029import java.util.function.Predicate; 030import java.util.regex.Pattern; 031 032/** 033 * HTTP protocol-layer decoding validation stage with security checks. 034 * 035 * <p>This stage performs URL decoding with security validation to detect and prevent 036 * HTTP protocol-layer encoding attacks such as double encoding and overlong UTF-8 encoding. 037 * <strong>Architectural Scope:</strong> Limited to HTTP/URL protocol encodings only.</p> 038 * 039 * <ol> 040 * <li><strong>Double Encoding Detection</strong> - Identifies %25XX patterns indicating double encoding</li> 041 * <li><strong>Overlong UTF-8 Detection</strong> - Blocks malformed UTF-8 encoding attacks</li> 042 * <li><strong>URL Decoding</strong> - Performs standard URL percent-decoding</li> 043 * <li><strong>Unicode Normalization</strong> - Optionally normalizes Unicode and detects changes</li> 044 * </ol> 045 * 046 * <h3>Design Principles</h3> 047 * <ul> 048 * <li><strong>Immutability</strong> - All fields are final, stage instances are immutable</li> 049 * <li><strong>Thread Safety</strong> - Safe for concurrent use across multiple threads</li> 050 * <li><strong>Performance</strong> - Uses pre-compiled patterns and efficient operations</li> 051 * <li><strong>Security First</strong> - Detects attacks before potentially dangerous decoding</li> 052 * </ul> 053 * 054 * <h3>Security Validations</h3> 055 * <ul> 056 * <li><strong>Double Encoding</strong> - Detects %25XX patterns that could bypass filters</li> 057 * <li><strong>Overlong UTF-8</strong> - Blocks malformed UTF-8 encoding attacks</li> 058 * <li><strong>Invalid Encoding</strong> - Catches malformed percent-encoded sequences</li> 059 * <li><strong>Unicode Normalization Attacks</strong> - Detects normalization changes that could alter meaning</li> 060 * </ul> 061 * 062 * <h3>Usage Examples</h3> 063 * <pre> 064 * // Create decoding stage for URL paths 065 * SecurityConfiguration config = SecurityConfiguration.defaults(); 066 * DecodingStage pathDecoder = new DecodingStage(config, ValidationType.URL_PATH); 067 * 068 * // Validate and decode input 069 * try { 070 * String decoded = pathDecoder.validate("/api/users%2F123"); 071 * // Returns: "/api/users/123" 072 * } catch (UrlSecurityException e) { 073 * // Handle security violation 074 * logger.warn("Encoding attack detected: {}", e.getFailureType()); 075 * } 076 * 077 * // Double encoding detection 078 * try { 079 * pathDecoder.validate("/admin%252F../users"); // %25 = encoded % 080 * // Throws UrlSecurityException with DOUBLE_ENCODING failure type 081 * } catch (UrlSecurityException e) { 082 * // Attack blocked before decoding 083 * } 084 * </pre> 085 * 086 * <h3>Performance Characteristics</h3> 087 * <ul> 088 * <li>O(n) time complexity where n is input length</li> 089 * <li>Single pass through input for double encoding detection</li> 090 * <li>Minimal memory allocation - reuses pattern instances</li> 091 * <li>Early termination on security violations</li> 092 * </ul> 093 * <p> 094 * Implements: Task V1 from HTTP verification specification 095 * 096 * @param config Security configuration controlling validation behavior. 097 * @param validationType Type of validation being performed (URL_PATH, PARAMETER_NAME, etc.). 098 * @see HttpSecurityValidator 099 * @see SecurityConfiguration 100 * @see ValidationType 101 * @since 1.0 102 */ 103public record DecodingStage(SecurityConfiguration config, 104ValidationType validationType) implements HttpSecurityValidator { 105 106 /** 107 * Pre-compiled pattern for detecting double encoding patterns. 108 * Matches %25 followed by two hexadecimal digits, indicating a percent sign 109 * that was encoded as %25 and then encoded again. 110 */ 111 private static final Pattern DOUBLE_ENCODING_PATTERN = Pattern.compile("%25[0-9a-fA-F]{2}"); 112 113 /** 114 * Pre-compiled pattern for detecting UTF-8 overlong encoding attacks. 115 * Matches UTF-8 overlong encodings commonly used to bypass security filters. 116 * Includes common overlong encodings for ASCII characters and path separators. 117 */ 118 @SuppressWarnings({"java:S5785", "java:S5855"}) private static final Pattern UTF8_OVERLONG_PATTERN = Pattern.compile( 119 """ 120 %c[0-1][0-9a-f]|\ 121 %e0%[89][0-9a-f]%[89a-f]|\ 122 %f0%80%[89][0-9a-f]%[89a-f]|\ 123 %c0%[a-f][0-9a-f]|%c1%[0-9a-f]|\ 124 %c0%ae|%c0%af|%c1%9c|%c1%81""", 125 Pattern.CASE_INSENSITIVE 126 ); 127 128 /** 129 * Validates input through HTTP protocol-layer decoding with security checks. 130 * 131 * <p><strong>Architectural Boundary:</strong> This stage operates strictly at the HTTP protocol layer, 132 * handling URL-specific encoding schemes. Application-layer encodings (HTML entities, JS escapes) 133 * are handled by higher application layers where they have proper context.</p> 134 * 135 * <p>HTTP Protocol Processing stages:</p> 136 * <ol> 137 * <li>Double encoding detection - fails fast if %25XX patterns found</li> 138 * <li>UTF-8 overlong encoding detection - blocks malformed UTF-8 attack patterns</li> 139 * <li>URL decoding - converts percent-encoded sequences to characters</li> 140 * <li>Unicode normalization - optionally normalizes and detects changes</li> 141 * </ol> 142 * 143 * @param value The input string to validate and decode 144 * @return The validated and decoded string wrapped in Optional, or Optional.empty() if input was null 145 * @throws UrlSecurityException if any security violations are detected: 146 * <ul> 147 * <li>DOUBLE_ENCODING - if double encoding patterns are found</li> 148 * <li>INVALID_ENCODING - if URL decoding fails due to malformed input</li> 149 * <li>UNICODE_NORMALIZATION_CHANGED - if Unicode normalization changes the string</li> 150 * </ul> 151 */ 152 @Override 153 public Optional<String> validate(@Nullable String value) throws UrlSecurityException { 154 if (value == null) { 155 return Optional.empty(); 156 } 157 158 // Step 1: Detect double encoding before decoding 159 if (!config.allowDoubleEncoding() && DOUBLE_ENCODING_PATTERN.matcher(value).find()) { 160 throw UrlSecurityException.builder() 161 .failureType(UrlSecurityFailureType.DOUBLE_ENCODING) 162 .validationType(validationType) 163 .originalInput(value) 164 .detail("Double encoding pattern %25XX detected in input") 165 .build(); 166 } 167 168 // Step 1.5: Detect UTF-8 overlong encoding attacks (always blocked - security critical) 169 if (UTF8_OVERLONG_PATTERN.matcher(value).find()) { 170 throw UrlSecurityException.builder() 171 .failureType(UrlSecurityFailureType.INVALID_ENCODING) 172 .validationType(validationType) 173 .originalInput(value) 174 .detail("UTF-8 overlong encoding attack detected") 175 .build(); 176 } 177 178 // Step 2: URL decode (HTTP protocol-layer appropriate) 179 String decoded; 180 try { 181 decoded = URLDecoder.decode(value, StandardCharsets.UTF_8); 182 } catch (IllegalArgumentException e) { 183 throw UrlSecurityException.builder() 184 .failureType(UrlSecurityFailureType.INVALID_ENCODING) 185 .validationType(validationType) 186 .originalInput(value) 187 .detail("URL decoding failed: " + e.getMessage()) 188 .cause(e) 189 .build(); 190 } 191 192 // Step 3: Unicode normalization with change detection 193 if (config.normalizeUnicode()) { 194 String normalized = Normalizer.normalize(decoded, Normalizer.Form.NFC); 195 if (!decoded.equals(normalized)) { 196 // Normalization changed the string - potential attack 197 throw UrlSecurityException.builder() 198 .failureType(UrlSecurityFailureType.UNICODE_NORMALIZATION_CHANGED) 199 .validationType(validationType) 200 .originalInput(value) 201 .sanitizedInput(normalized) 202 .detail("Unicode normalization changed string content") 203 .build(); 204 } 205 decoded = normalized; 206 } 207 208 return Optional.of(decoded); 209 } 210 211 /** 212 * Creates a conditional validator that only processes non-null, non-empty inputs. 213 * 214 * @return A conditional HttpSecurityValidator that skips null/empty inputs 215 */ 216 @Override 217 public HttpSecurityValidator when(Predicate<String> condition) { 218 return input -> { 219 if (input == null || !condition.test(input)) { 220 return Optional.ofNullable(input); 221 } 222 return validate(input); 223 }; 224 } 225 226 227}