001/* 002 * Copyright 2023 the original author or authors. 003 * <p> 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * <p> 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * <p> 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.cuioss.tools.net; 017 018import java.net.IDN; 019import java.util.function.UnaryOperator; 020import java.util.regex.Pattern; 021 022import lombok.NonNull; 023import lombok.experimental.UtilityClass; 024 025/** 026 * <p> 027 * Utility class to handle IDN email addresses. 028 * </p> 029 * See 030 * <ul> 031 * <li><a href= 032 * "https://docs.oracle.com/javase/tutorial/i18n/network/idn.html">https://docs.oracle.com/ 033 * javase/tutorial/i18n/network/idn.html</a></li> 034 * <li><a href= 035 * "https://de.wikipedia.org/wiki/Internationalisierter_Domainname">https://de.wikipedia.org 036 * /wiki/Internationalisierter_Domainname</a></li> 037 * <li><a href= 038 * "https://en.wikipedia.org/wiki/Internationalized_domain_name">https://en.wikipedia.org/ 039 * wiki/Internationalized_domain_name</a></li> 040 * </ul> 041 * 042 * @author Matthias Walliczek 043 */ 044@UtilityClass 045public class IDNInternetAddress { 046 047 private static final Pattern addressPatternWithDisplayName = Pattern 048 .compile("(.{0,64})<(.{1,64})@(.{1,64})>(.{0,64})"); 049 050 private static final Pattern addressPattern = Pattern.compile("(.{1,64})@(.{1,64})"); 051 052 /** 053 * Encode the domain part of an email address 054 * 055 * @param completeAddress the address to encode in RFC822 format 056 * @return the encoded address in RFC822 format 057 */ 058 public static String encode(@NonNull final String completeAddress) { 059 return encode(completeAddress, untrustedHtml -> untrustedHtml); 060 } 061 062 /** 063 * Encodes the given address and sanitizes the elements with the provided 064 * sanitizer. It takes care on the special elements like {@code <>} by not 065 * trying to sanitize them. 066 * 067 * @param completeAddress 068 * @param sanitizer to be passed as UnaryOperator 069 * @return the sanitized and encoded address. 070 */ 071 public static String encode(@NonNull final String completeAddress, UnaryOperator<String> sanitizer) { 072 var matcher = addressPatternWithDisplayName.matcher(completeAddress); 073 if (matcher.matches() && matcher.groupCount() == 4) { 074 return sanitizer.apply(matcher.group(1)) + "<" + sanitizer.apply(matcher.group(2)) + "@" 075 + sanitizer.apply(IDN.toASCII(matcher.group(3))) + ">" + sanitizer.apply(matcher.group(4)); 076 } 077 matcher = addressPattern.matcher(completeAddress); 078 if (matcher.matches() && matcher.groupCount() == 2) { 079 return sanitizer.apply(matcher.group(1)) + "@" + sanitizer.apply(IDN.toASCII(matcher.group(2))); 080 } 081 return sanitizer.apply(completeAddress); 082 } 083 084 /** 085 * Decode the domain part of an email address 086 * 087 * @param completeAddress the address to decode in RFC822 format 088 * @return the decoded address in RFC822 format 089 */ 090 public static String decode(@NonNull final String completeAddress) { 091 return decode(completeAddress, untrustedHtml -> untrustedHtml); 092 } 093 094 /** 095 * Decodes the given address and sanitizes the elements with the provided 096 * sanitizer. It takes care on the special elements like <> by not trying to 097 * sanitize them. 098 * 099 * @param completeAddress 100 * @param sanitizer to be passed as UnaryOperator 101 * @return the sanitized and decoded address. 102 */ 103 public static String decode(@NonNull final String completeAddress, UnaryOperator<String> sanitizer) { 104 var matcher = addressPatternWithDisplayName.matcher(completeAddress); 105 if (matcher.matches() && matcher.groupCount() == 4) { 106 return sanitizer.apply(matcher.group(1)) + "<" + sanitizer.apply(matcher.group(2)) + "@" 107 + sanitizer.apply(IDN.toUnicode(matcher.group(3))) + ">" + sanitizer.apply(matcher.group(4)); 108 } 109 matcher = addressPattern.matcher(completeAddress); 110 if (matcher.matches() && matcher.groupCount() == 2) { 111 return sanitizer.apply(matcher.group(1)) + "@" + sanitizer.apply(IDN.toUnicode(matcher.group(2))); 112 } 113 return sanitizer.apply(completeAddress); 114 } 115}