001/*
002 * Copyright 2023 the original author or authors.
003 * <p>
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 * <p>
008 * https://www.apache.org/licenses/LICENSE-2.0
009 * <p>
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package de.cuioss.tools.net;
017
018import java.net.IDN;
019import java.util.function.UnaryOperator;
020import java.util.regex.Pattern;
021
022import lombok.NonNull;
023import lombok.experimental.UtilityClass;
024
025/**
026 * <p>
027 * Utility class to handle IDN email addresses.
028 * </p>
029 * See
030 * <ul>
031 * <li><a href=
032 * "https://docs.oracle.com/javase/tutorial/i18n/network/idn.html">https://docs.oracle.com/
033 * javase/tutorial/i18n/network/idn.html</a></li>
034 * <li><a href=
035 * "https://de.wikipedia.org/wiki/Internationalisierter_Domainname">https://de.wikipedia.org
036 * /wiki/Internationalisierter_Domainname</a></li>
037 * <li><a href=
038 * "https://en.wikipedia.org/wiki/Internationalized_domain_name">https://en.wikipedia.org/
039 * wiki/Internationalized_domain_name</a></li>
040 * </ul>
041 *
042 * @author Matthias Walliczek
043 */
044@UtilityClass
045public class IDNInternetAddress {
046
047    private static final Pattern addressPatternWithDisplayName = Pattern
048            .compile("(.{0,64})<(.{1,64})@(.{1,64})>(.{0,64})");
049
050    private static final Pattern addressPattern = Pattern.compile("(.{1,64})@(.{1,64})");
051
052    /**
053     * Encode the domain part of an email address
054     *
055     * @param completeAddress the address to encode in RFC822 format
056     * @return the encoded address in RFC822 format
057     */
058    public static String encode(@NonNull final String completeAddress) {
059        return encode(completeAddress, untrustedHtml -> untrustedHtml);
060    }
061
062    /**
063     * Encodes the given address and sanitizes the elements with the provided
064     * sanitizer. It takes care on the special elements like {@code <>} by not
065     * trying to sanitize them.
066     *
067     * @param completeAddress
068     * @param sanitizer       to be passed as UnaryOperator
069     * @return the sanitized and encoded address.
070     */
071    public static String encode(@NonNull final String completeAddress, UnaryOperator<String> sanitizer) {
072        var matcher = addressPatternWithDisplayName.matcher(completeAddress);
073        if (matcher.matches() && matcher.groupCount() == 4) {
074            return sanitizer.apply(matcher.group(1)) + "<" + sanitizer.apply(matcher.group(2)) + "@"
075                    + sanitizer.apply(IDN.toASCII(matcher.group(3))) + ">" + sanitizer.apply(matcher.group(4));
076        }
077        matcher = addressPattern.matcher(completeAddress);
078        if (matcher.matches() && matcher.groupCount() == 2) {
079            return sanitizer.apply(matcher.group(1)) + "@" + sanitizer.apply(IDN.toASCII(matcher.group(2)));
080        }
081        return sanitizer.apply(completeAddress);
082    }
083
084    /**
085     * Decode the domain part of an email address
086     *
087     * @param completeAddress the address to decode in RFC822 format
088     * @return the decoded address in RFC822 format
089     */
090    public static String decode(@NonNull final String completeAddress) {
091        return decode(completeAddress, untrustedHtml -> untrustedHtml);
092    }
093
094    /**
095     * Decodes the given address and sanitizes the elements with the provided
096     * sanitizer. It takes care on the special elements like <> by not trying to
097     * sanitize them.
098     *
099     * @param completeAddress
100     * @param sanitizer       to be passed as UnaryOperator
101     * @return the sanitized and decoded address.
102     */
103    public static String decode(@NonNull final String completeAddress, UnaryOperator<String> sanitizer) {
104        var matcher = addressPatternWithDisplayName.matcher(completeAddress);
105        if (matcher.matches() && matcher.groupCount() == 4) {
106            return sanitizer.apply(matcher.group(1)) + "<" + sanitizer.apply(matcher.group(2)) + "@"
107                    + sanitizer.apply(IDN.toUnicode(matcher.group(3))) + ">" + sanitizer.apply(matcher.group(4));
108        }
109        matcher = addressPattern.matcher(completeAddress);
110        if (matcher.matches() && matcher.groupCount() == 2) {
111            return sanitizer.apply(matcher.group(1)) + "@" + sanitizer.apply(IDN.toUnicode(matcher.group(2)));
112        }
113        return sanitizer.apply(completeAddress);
114    }
115}