001/*
002 * Copyright © 2025 CUI-OpenSource-Software (info@cuioss.de)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package de.cuioss.http.security.validation;
017
018import de.cuioss.http.security.config.SecurityConfiguration;
019import de.cuioss.http.security.core.HttpSecurityValidator;
020import de.cuioss.http.security.core.UrlSecurityFailureType;
021import de.cuioss.http.security.core.ValidationType;
022import de.cuioss.http.security.exceptions.UrlSecurityException;
023import org.jspecify.annotations.Nullable;
024
025import java.net.URLDecoder;
026import java.nio.charset.StandardCharsets;
027import java.text.Normalizer;
028import java.util.Optional;
029import java.util.function.Predicate;
030import java.util.regex.Pattern;
031
032/**
033 * HTTP protocol-layer decoding validation stage with security checks.
034 *
035 * <p>This stage performs URL decoding with security validation to detect and prevent
036 * HTTP protocol-layer encoding attacks such as double encoding and overlong UTF-8 encoding.
037 * <strong>Architectural Scope:</strong> Limited to HTTP/URL protocol encodings only.</p>
038 *
039 * <ol>
040 *   <li><strong>Double Encoding Detection</strong> - Identifies %25XX patterns indicating double encoding</li>
041 *   <li><strong>Overlong UTF-8 Detection</strong> - Blocks malformed UTF-8 encoding attacks</li>
042 *   <li><strong>URL Decoding</strong> - Performs standard URL percent-decoding</li>
043 *   <li><strong>Unicode Normalization</strong> - Optionally normalizes Unicode and detects changes</li>
044 * </ol>
045 *
046 * <h3>Design Principles</h3>
047 * <ul>
048 *   <li><strong>Immutability</strong> - All fields are final, stage instances are immutable</li>
049 *   <li><strong>Thread Safety</strong> - Safe for concurrent use across multiple threads</li>
050 *   <li><strong>Performance</strong> - Uses pre-compiled patterns and efficient operations</li>
051 *   <li><strong>Security First</strong> - Detects attacks before potentially dangerous decoding</li>
052 * </ul>
053 *
054 * <h3>Security Validations</h3>
055 * <ul>
056 *   <li><strong>Double Encoding</strong> - Detects %25XX patterns that could bypass filters</li>
057 *   <li><strong>Overlong UTF-8</strong> - Blocks malformed UTF-8 encoding attacks</li>
058 *   <li><strong>Invalid Encoding</strong> - Catches malformed percent-encoded sequences</li>
059 *   <li><strong>Unicode Normalization Attacks</strong> - Detects normalization changes that could alter meaning</li>
060 * </ul>
061 *
062 * <h3>Usage Examples</h3>
063 * <pre>
064 * // Create decoding stage for URL paths
065 * SecurityConfiguration config = SecurityConfiguration.defaults();
066 * DecodingStage pathDecoder = new DecodingStage(config, ValidationType.URL_PATH);
067 *
068 * // Validate and decode input
069 * try {
070 *     String decoded = pathDecoder.validate("/api/users%2F123");
071 *     // Returns: "/api/users/123"
072 * } catch (UrlSecurityException e) {
073 *     // Handle security violation
074 *     logger.warn("Encoding attack detected: {}", e.getFailureType());
075 * }
076 *
077 * // Double encoding detection
078 * try {
079 *     pathDecoder.validate("/admin%252F../users"); // %25 = encoded %
080 *     // Throws UrlSecurityException with DOUBLE_ENCODING failure type
081 * } catch (UrlSecurityException e) {
082 *     // Attack blocked before decoding
083 * }
084 * </pre>
085 *
086 * <h3>Performance Characteristics</h3>
087 * <ul>
088 *   <li>O(n) time complexity where n is input length</li>
089 *   <li>Single pass through input for double encoding detection</li>
090 *   <li>Minimal memory allocation - reuses pattern instances</li>
091 *   <li>Early termination on security violations</li>
092 * </ul>
093 * <p>
094 * Implements: Task V1 from HTTP verification specification
095 *
096 * @param config         Security configuration controlling validation behavior.
097 * @param validationType Type of validation being performed (URL_PATH, PARAMETER_NAME, etc.).
098 * @see HttpSecurityValidator
099 * @see SecurityConfiguration
100 * @see ValidationType
101 * @since 1.0
102 */
103public record DecodingStage(SecurityConfiguration config,
104ValidationType validationType) implements HttpSecurityValidator {
105
106    /**
107     * Pre-compiled pattern for detecting double encoding patterns.
108     * Matches %25 followed by two hexadecimal digits, indicating a percent sign
109     * that was encoded as %25 and then encoded again.
110     */
111    private static final Pattern DOUBLE_ENCODING_PATTERN = Pattern.compile("%25[0-9a-fA-F]{2}");
112
113    /**
114     * Pre-compiled pattern for detecting UTF-8 overlong encoding attacks.
115     * Matches UTF-8 overlong encodings commonly used to bypass security filters.
116     * Includes common overlong encodings for ASCII characters and path separators.
117     */
118    @SuppressWarnings({"java:S5785", "java:S5855"}) private static final Pattern UTF8_OVERLONG_PATTERN = Pattern.compile(
119            """
120            %c[0-1][0-9a-f]|\
121            %e0%[89][0-9a-f]%[89a-f]|\
122            %f0%80%[89][0-9a-f]%[89a-f]|\
123            %c0%[a-f][0-9a-f]|%c1%[0-9a-f]|\
124            %c0%ae|%c0%af|%c1%9c|%c1%81""",
125            Pattern.CASE_INSENSITIVE
126    );
127
128    /**
129     * Validates input through HTTP protocol-layer decoding with security checks.
130     *
131     * <p><strong>Architectural Boundary:</strong> This stage operates strictly at the HTTP protocol layer,
132     * handling URL-specific encoding schemes. Application-layer encodings (HTML entities, JS escapes)
133     * are handled by higher application layers where they have proper context.</p>
134     *
135     * <p>HTTP Protocol Processing stages:</p>
136     * <ol>
137     *   <li>Double encoding detection - fails fast if %25XX patterns found</li>
138     *   <li>UTF-8 overlong encoding detection - blocks malformed UTF-8 attack patterns</li>
139     *   <li>URL decoding - converts percent-encoded sequences to characters</li>
140     *   <li>Unicode normalization - optionally normalizes and detects changes</li>
141     * </ol>
142     *
143     * @param value The input string to validate and decode
144     * @return The validated and decoded string wrapped in Optional, or Optional.empty() if input was null
145     * @throws UrlSecurityException if any security violations are detected:
146     *                              <ul>
147     *                                <li>DOUBLE_ENCODING - if double encoding patterns are found</li>
148     *                                <li>INVALID_ENCODING - if URL decoding fails due to malformed input</li>
149     *                                <li>UNICODE_NORMALIZATION_CHANGED - if Unicode normalization changes the string</li>
150     *                              </ul>
151     */
152    @Override
153    public Optional<String> validate(@Nullable String value) throws UrlSecurityException {
154        if (value == null) {
155            return Optional.empty();
156        }
157
158        // Step 1: Detect double encoding before decoding
159        if (!config.allowDoubleEncoding() && DOUBLE_ENCODING_PATTERN.matcher(value).find()) {
160            throw UrlSecurityException.builder()
161                    .failureType(UrlSecurityFailureType.DOUBLE_ENCODING)
162                    .validationType(validationType)
163                    .originalInput(value)
164                    .detail("Double encoding pattern %25XX detected in input")
165                    .build();
166        }
167
168        // Step 1.5: Detect UTF-8 overlong encoding attacks (always blocked - security critical)
169        if (UTF8_OVERLONG_PATTERN.matcher(value).find()) {
170            throw UrlSecurityException.builder()
171                    .failureType(UrlSecurityFailureType.INVALID_ENCODING)
172                    .validationType(validationType)
173                    .originalInput(value)
174                    .detail("UTF-8 overlong encoding attack detected")
175                    .build();
176        }
177
178        // Step 2: URL decode (HTTP protocol-layer appropriate)
179        String decoded;
180        try {
181            decoded = URLDecoder.decode(value, StandardCharsets.UTF_8);
182        } catch (IllegalArgumentException e) {
183            throw UrlSecurityException.builder()
184                    .failureType(UrlSecurityFailureType.INVALID_ENCODING)
185                    .validationType(validationType)
186                    .originalInput(value)
187                    .detail("URL decoding failed: " + e.getMessage())
188                    .cause(e)
189                    .build();
190        }
191
192        // Step 3: Unicode normalization with change detection
193        if (config.normalizeUnicode()) {
194            String normalized = Normalizer.normalize(decoded, Normalizer.Form.NFC);
195            if (!decoded.equals(normalized)) {
196                // Normalization changed the string - potential attack
197                throw UrlSecurityException.builder()
198                        .failureType(UrlSecurityFailureType.UNICODE_NORMALIZATION_CHANGED)
199                        .validationType(validationType)
200                        .originalInput(value)
201                        .sanitizedInput(normalized)
202                        .detail("Unicode normalization changed string content")
203                        .build();
204            }
205            decoded = normalized;
206        }
207
208        return Optional.of(decoded);
209    }
210
211    /**
212     * Creates a conditional validator that only processes non-null, non-empty inputs.
213     *
214     * @return A conditional HttpSecurityValidator that skips null/empty inputs
215     */
216    @Override
217    public HttpSecurityValidator when(Predicate<String> condition) {
218        return input -> {
219            if (input == null || !condition.test(input)) {
220                return Optional.ofNullable(input);
221            }
222            return validate(input);
223        };
224    }
225
226
227}