001/*
002 * Copyright © 2025 CUI-OpenSource-Software (info@cuioss.de)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package de.cuioss.http.security.validation;
017
018import de.cuioss.http.security.config.SecurityConfiguration;
019import de.cuioss.http.security.config.SecurityDefaults;
020import de.cuioss.http.security.core.HttpSecurityValidator;
021import de.cuioss.http.security.core.UrlSecurityFailureType;
022import de.cuioss.http.security.core.ValidationType;
023import de.cuioss.http.security.exceptions.UrlSecurityException;
024import org.jspecify.annotations.Nullable;
025
026import java.util.Optional;
027import java.util.function.Predicate;
028import java.util.regex.Pattern;
029
030/**
031 * Pattern matching validation stage for detecting malicious attack patterns.
032 *
033 * <p>This stage performs comprehensive pattern-based security validation to detect
034 * known attack signatures, injection attempts, and suspicious content patterns.
035 * The stage analyzes input against multiple security pattern databases:</p>
036 *
037 * <ol>
038 *   <li><strong>Path Traversal Patterns</strong> - Detects directory traversal attempts</li>
039 *   <li><strong>Suspicious Protocol Patterns</strong> - Identifies protocol violations</li>
040 *   <li><strong>Suspicious Path Patterns</strong> - Detects access to sensitive system locations</li>
041 *   <li><strong>Parameter Attack Patterns</strong> - Identifies malicious parameter usage</li>
042 * </ol>
043 *
044 * <h3>Design Principles</h3>
045 * <ul>
046 *   <li><strong>Signature-Based Detection</strong> - Uses known attack patterns from OWASP and CVE databases</li>
047 *   <li><strong>Configurable Sensitivity</strong> - Behavior controlled by failOnSuspiciousPatterns setting</li>
048 *   <li><strong>Performance Optimized</strong> - Uses pre-compiled patterns and efficient string operations</li>
049 *   <li><strong>Context Aware</strong> - Different pattern sets applied based on validation type</li>
050 * </ul>
051 *
052 * <h3>Security Validations</h3>
053 * <ul>
054 *   <li><strong>Path Traversal</strong> - ../,..\\, and encoded variants</li>
055 *   <li><strong>Protocol Violations</strong> - Suspicious URI schemes and protocol handlers</li>
056 *   <li><strong>File Access</strong> - Attempts to access sensitive system files</li>
057 *   <li><strong>Parameter Pollution</strong> - Suspicious parameter names and patterns</li>
058 * </ul>
059 *
060 * <h3>Usage Examples</h3>
061 * <pre>
062 * // Create pattern matching stage
063 * SecurityConfiguration config = SecurityConfiguration.defaults();
064 * PatternMatchingStage matcher = new PatternMatchingStage(config, ValidationType.URL_PATH);
065 *
066 * // Detect path traversal attack
067 * try {
068 *     matcher.validate("/api/users/../../../etc/passwd");
069 *     // Throws UrlSecurityException with PATH_TRAVERSAL_DETECTED
070 * } catch (UrlSecurityException e) {
071 *     logger.warn("Path traversal blocked: {}", e.getDetail());
072 * }
073 *
074 * // Path traversal detection
075 * try {
076 *     matcher.validate("../../../etc/passwd");
077 * } catch (UrlSecurityException e) {
078 *     logger.warn("Path traversal blocked: {}", e.getDetail());
079 * }
080 *
081 * // Configurable sensitivity
082 * SecurityConfiguration strict = SecurityConfiguration.strict(); // failOnSuspiciousPatterns=true
083 * PatternMatchingStage strictMatcher = new PatternMatchingStage(strict, ValidationType.PARAMETER_VALUE);
084 *
085 * // Legitimate content that might trigger in strict mode
086 * try {
087 *     strictMatcher.validate("SELECT name FROM contacts WHERE id = 123");
088 *     // May throw if configured to fail on suspicious patterns
089 * } catch (UrlSecurityException e) {
090 *     // Handle based on security policy
091 * }
092 * </pre>
093 *
094 * <h3>Performance Characteristics</h3>
095 * <ul>
096 *   <li>O(n*m) time complexity where n = input length, m = number of patterns</li>
097 *   <li>Early termination on first pattern match</li>
098 *   <li>Optimized pattern order based on common attack frequency</li>
099 *   <li>Case-insensitive matching for broader attack detection</li>
100 * </ul>
101 *
102 * <h3>Configuration Dependencies</h3>
103 * <ul>
104 *   <li><strong>failOnSuspiciousPatterns</strong> - Controls whether to fail on pattern matches</li>
105 *   <li><strong>caseSensitiveComparison</strong> - Affects pattern matching behavior</li>
106 *   <li><strong>logSecurityViolations</strong> - Controls violation logging</li>
107 * </ul>
108 * <p>
109 * Implements: Task V3 from HTTP verification specification
110 *
111 * @param config         Security configuration controlling validation behavior.
112 * @param validationType Type of validation being performed (URL_PATH, PARAMETER_NAME, etc.).
113 * @see HttpSecurityValidator
114 * @see SecurityConfiguration
115 * @see SecurityDefaults
116 * @see ValidationType
117 * @since 1.0
118 */
119public record PatternMatchingStage(SecurityConfiguration config,
120ValidationType validationType) implements HttpSecurityValidator {
121
122    /**
123     * Pre-compiled regex pattern for detecting encoded path traversal sequences.
124     * Matches various URL-encoded representations of ../ and ..\ patterns including
125     * double-encoded, UTF-8 overlong, and mixed encoding attempts.
126     * ReDoS-safe: Uses only atomic patterns without nested or consecutive quantifiers.
127     */
128    @SuppressWarnings({"java:S5869", "java:S5867", "java:S5855"}) private static final Pattern ENCODED_TRAVERSAL_PATTERN = Pattern.compile(
129            """
130            %2e%2e(%2f|%5c|/|\\\\)|\
131            \\.%2e(%2f|%5c|/|\\\\)|%2e\\.(%2f|%5c|/|\\\\)|\
132            %252e%252e(%252f|%255c)|\
133            \\.\\.(%252f|%255c)|\
134            %c0%ae%c0%ae(%c0%af|%c1%9c|/|\\\\)|%c1%9c%c1%9c|%c1%8s|\
135            %c0%ae%c0%ae%c0%af|%c0%ae%c0%af|%c1%9c|\
136            %2e%2e//|%2e%2e\\\\\\\\""",
137            Pattern.CASE_INSENSITIVE
138    );
139
140    /**
141     * Pre-compiled regex pattern for detecting multiple dots followed by path separators.
142     * ReDoS-safe: Uses specific atomic patterns without quantifiers that could cause backtracking.
143     */
144    @SuppressWarnings({"java:S5869", "java:S6035", "RegExpSingleCharAlternation"}) private static final Pattern DOT_SEPARATOR_PATTERN = Pattern.compile(
145            """
146            \\.\\.(/|\\\\)|\\.\\.%2f|\\.\\.%5c|\
147            \\.\\.\\.(/|\\\\)|\\.\\.\\.%2f|\\.\\.\\.%5c|\
148            \\.\\.\\.\\.(/|\\\\)|\\.\\.\\.\\.%2f|\\.\\.\\.\\.%5c|\
149            \\.\\.\\.\\.\\.(/|\\\\)|\\.\\.\\.\\.\\.%2f|\\.\\.\\.\\.\\.%5c""",
150            Pattern.CASE_INSENSITIVE
151    );
152
153
154    // XSS script pattern removed - application layer responsibility.
155    // Application layers have proper context for HTML/JS escaping and validation.
156
157
158    /**
159     * Validates input against comprehensive attack pattern databases.
160     *
161     * <p>Processing stages:</p>
162     * <ol>
163     *   <li>Input validation - handles null/empty inputs</li>
164     *   <li>Context-sensitive pattern selection - chooses appropriate patterns for validation type</li>
165     *   <li>Pattern matching - tests against known attack signatures</li>
166     *   <li>Policy enforcement - applies configured response to pattern matches</li>
167     * </ol>
168     *
169     * @param value The input string to validate against attack patterns
170     * @return The original input wrapped in Optional if validation passes, or Optional.empty() if input was null
171     * @throws UrlSecurityException if malicious patterns are detected:
172     *                              <ul>
173     *                                <li>PATH_TRAVERSAL_DETECTED - if path traversal patterns found</li>
174     *                                <!-- XSS detection removed - application layer responsibility -->
175     *                                <li>SUSPICIOUS_PATTERN_DETECTED - if suspicious patterns found and policy requires failure</li>
176     *                              </ul>
177     */
178    @Override
179    @SuppressWarnings("java:S3516")
180    public Optional<String> validate(@Nullable String value) throws UrlSecurityException {
181        if (value == null) {
182            return Optional.empty();
183        }
184        if (value.isEmpty()) {
185            return Optional.of(value);
186        }
187
188        // Prepare value for case-insensitive matching if needed
189        String testValue = config.caseSensitiveComparison() ? value : value.toLowerCase();
190
191        // Step 1: Check for path traversal patterns (applies to paths and parameters)
192        if (validationType == ValidationType.URL_PATH ||
193                validationType == ValidationType.PARAMETER_VALUE ||
194                validationType == ValidationType.PARAMETER_NAME) {
195
196            checkPathTraversalPatterns(value, testValue);
197        }
198
199        // XSS pattern checking removed - application layer responsibility.
200
201        // Step 3: Check for suspicious system paths (paths and parameters)
202        if (validationType == ValidationType.URL_PATH || validationType == ValidationType.PARAMETER_VALUE) {
203            checkSuspiciousPathPatterns(value, testValue);
204        }
205
206        // Step 4: Check for suspicious parameter names (parameter names only)
207        if (validationType == ValidationType.PARAMETER_NAME) {
208            checkSuspiciousParameterNames(value, testValue);
209        }
210
211        // Validation passed - return original value
212        // Note: Always returning input value is correct for validator contract
213        return Optional.of(value);
214    }
215
216    /**
217     * Checks input for path traversal attack patterns.
218     *
219     * <p><strong>Security Critical:</strong> Path traversal patterns are ALWAYS blocked
220     * regardless of the failOnSuspiciousPatterns configuration, as they represent
221     * direct security threats, not merely suspicious behavior.</p>
222     *
223     * @param originalValue The original input value
224     * @param testValue     The value prepared for testing (case-normalized if needed)
225     * @throws UrlSecurityException if path traversal patterns are detected
226     */
227    private void checkPathTraversalPatterns(String originalValue, String testValue) {
228        // Check simple string patterns - ALWAYS fail on path traversal (security critical)
229        for (String pattern : SecurityDefaults.PATH_TRAVERSAL_PATTERNS) {
230            String checkPattern = config.caseSensitiveComparison() ? pattern : pattern.toLowerCase();
231            if (testValue.contains(checkPattern)) {
232                throw UrlSecurityException.builder()
233                        .failureType(UrlSecurityFailureType.PATH_TRAVERSAL_DETECTED)
234                        .validationType(validationType)
235                        .originalInput(originalValue)
236                        .detail("Path traversal pattern detected: " + pattern)
237                        .build();
238            }
239        }
240
241        // Check encoded patterns using regex - ALWAYS fail on path traversal (security critical)
242        if (ENCODED_TRAVERSAL_PATTERN.matcher(originalValue).find()) {
243            throw UrlSecurityException.builder()
244                    .failureType(UrlSecurityFailureType.PATH_TRAVERSAL_DETECTED)
245                    .validationType(validationType)
246                    .originalInput(originalValue)
247                    .detail("Encoded path traversal pattern detected via regex")
248                    .build();
249        }
250
251        // Additional check: Look for any sequence of dots followed by path separators
252        // This catches edge cases like multiple dots or mixed separators
253        // ReDoS-safe: Using contains() with a compiled pattern instead of matches() with .*
254        if (DOT_SEPARATOR_PATTERN.matcher(originalValue).find()) {
255            throw UrlSecurityException.builder()
256                    .failureType(UrlSecurityFailureType.PATH_TRAVERSAL_DETECTED)
257                    .validationType(validationType)
258                    .originalInput(originalValue)
259                    .detail("Path traversal pattern detected: multiple dots with separators")
260                    .build();
261        }
262
263    }
264
265    // XSS pattern checking removed - application layer responsibility.
266    // Application layers have proper context for HTML/JS escaping and validation.
267
268    /**
269     * Checks input for suspicious system path patterns.
270     *
271     * @param originalValue The original input value
272     * @param testValue     The value prepared for testing (case-normalized if needed)
273     * @throws UrlSecurityException if suspicious patterns are found and policy requires failure
274     */
275    private void checkSuspiciousPathPatterns(String originalValue, String testValue) {
276        for (String pattern : SecurityDefaults.SUSPICIOUS_PATH_PATTERNS) {
277            String checkPattern = config.caseSensitiveComparison() ? pattern : pattern.toLowerCase();
278            if (testValue.contains(checkPattern) && config.failOnSuspiciousPatterns()) {
279                throw UrlSecurityException.builder()
280                        .failureType(UrlSecurityFailureType.SUSPICIOUS_PATTERN_DETECTED)
281                        .validationType(validationType)
282                        .originalInput(originalValue)
283                        .detail("Suspicious path pattern detected: " + pattern)
284                        .build();
285            }
286            // If not configured to fail, continue validation but could log here
287
288        }
289    }
290
291    /**
292     * Checks parameter names for suspicious patterns commonly used in attacks.
293     *
294     * @param originalValue The original input value
295     * @param testValue     The value prepared for testing (case-normalized if needed)
296     * @throws UrlSecurityException if suspicious parameter names are found and policy requires failure
297     */
298    private void checkSuspiciousParameterNames(String originalValue, String testValue) {
299        for (String suspiciousName : SecurityDefaults.SUSPICIOUS_PARAMETER_NAMES) {
300            String checkName = config.caseSensitiveComparison() ? suspiciousName : suspiciousName.toLowerCase();
301            if ((testValue.equals(checkName) || testValue.contains(checkName)) && config.failOnSuspiciousPatterns()) {
302                throw UrlSecurityException.builder()
303                        .failureType(UrlSecurityFailureType.SUSPICIOUS_PARAMETER_NAME)
304                        .validationType(validationType)
305                        .originalInput(originalValue)
306                        .detail("Suspicious parameter name detected: " + suspiciousName)
307                        .build();
308            }
309            // If not configured to fail, continue validation
310
311        }
312    }
313
314    /**
315     * Creates a conditional validator that only processes inputs matching the condition.
316     *
317     * @param condition The condition to test before validation
318     * @return A conditional HttpSecurityValidator that applies pattern matching conditionally
319     */
320    @Override
321    public HttpSecurityValidator when(Predicate<String> condition) {
322        return input -> {
323            if (input == null || !condition.test(input)) {
324                return Optional.ofNullable(input);
325            }
326            return validate(input);
327        };
328    }
329
330
331}