package de.pseudonymisierung.controlnumbers;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.List;
import java.util.function.Function;

/** Generator for "simple" control number, i.e. without encryption by a secret
 * key.
 * 
 * Use {@link #builder()} to get a {@link Builder} instance, which has methods
 * for setting various parameters. */
public class ControlNumberGenerator implements Function<String, ControlNumber> {
	
	/** Default bit lenght of control numbers. */
	public static final int defaultHashLength = 500;
	/** Default length of n-grams (2 -> bigrams). */
	public static final int defaultNGramLength = 2;
	/** Default number of hash functions. */
	public static final int defaultNHashFunctions = 15;
	/** Default encoding to use when transforming input strings to byte
	 * sequences (necessery for applying hash functions and encryption). */
	public static final Charset defaultEncoding = StandardCharsets.UTF_16;
	
	private int hashLength;
	private int nGramLength;
	private int nHashFunctions;
	protected Charset encoding;
	
	private MessageDigest sha1;
	private MessageDigest md5;
	
	/** Get a builder object with methods to set parameters to non-default
	 * values and to get the resulting instance of
	 * {@link ControlNumberGenerator}.
	 * 
	 * @return An instance of {@link Builder} */
	public static Builder builder() {
		return new Builder();
	}
	
	/** Calculate a control number from an input string.
	 * 
	 * @param input
	 *            The character string to encode.
	 * @return The resulting control number. */
	@Override
	public ControlNumber apply(String input) {
		ControlNumber out = new ControlNumber(hashLength);
		
		List<String> nGrams = Utils.getNGrams(input, this.nGramLength);
		for (String nGram : nGrams) {
			byte[] md5 = getMd5Hash(nGram);
			byte[] sha1 = getSha1Hash(nGram);
			for (int i = 0; i < nHashFunctions; i++) {
				int hashRet = hash(md5, sha1, i);
				out.set(hashRet);
			}
		}
		
		return out;
	}
	
	/** Get MD5 hash of a string. This is one of the base hash functions from
	 * which the desired number of hashes is calculated. The base hashes are
	 * defined as protected methods so that
	 * {@link EncryptedControlNumberGenerator} only needs to override these
	 * using keyed hash functions.
	 * 
	 * @param input
	 *            The character string to encode.
	 * @return The resulting hash. */
	protected synchronized byte[] getMd5Hash(String input) {
		return md5.digest(input.getBytes(encoding));
	}
	
	/** Get SHA1 hash of a string.
	 * 
	 * @see #getMd5Hash(String) for more information. */
	protected synchronized byte[] getSha1Hash(String input) {
		return sha1.digest(input.getBytes(encoding));
	}
	
	protected int hash(byte[] basehash1, byte[] baseHash2, int index) {
		
		int hash1 = 0;
		int hash2 = 0;
		
		// calculate significant Bytes of Hash
		int nSignBytes = (int) Math.ceil(Math.log(hashLength) / Math.log(256));
		
		// calculate combined Hash
		for (int byteInd = 0; byteInd < nSignBytes; byteInd++) {
			// byte is signed (-128 - 127), add 128 to get an unsigned value
			hash1 += Math.pow(256, byteInd) * (basehash1[basehash1.length - 1 - byteInd] + 128);
			hash2 += Math.pow(256, byteInd) * (baseHash2[baseHash2.length - 1 - byteInd] + 128);
		}
		
		return (hash1 + index * hash2) % hashLength;
	}
	
	/** Abstraction of a builder class. This is needed to implement builders for
	 * {@link ControlNumberGenerator} and
	 * {@link EncryptedControlNumberGenerator}, which generate instances of the
	 * respective classes, while keeping common code in one place. (see
	 * https://stackoverflow.com/questions/21086417/builder-pattern-and-inheritance).
	 * 
	 * Implementations should set T to a fixed class.
	 * 
	 * @param <T>
	 *            The type of objects built by an implementation of this
	 *            class. */
	public static abstract class AbstractBuilder<T extends ControlNumberGenerator> {
		protected int hashLength = defaultHashLength;
		protected int nGramLength = defaultNGramLength;
		protected int nHashFunctions = defaultNHashFunctions;
		protected Charset encoding = defaultEncoding;
		
		/** Set the bit length of control numbers. */
		public AbstractBuilder<T> hashLength(int hashLength) {
			this.hashLength = hashLength;
			return this;
		}
		
		/** Set the length of n grams. */
		public AbstractBuilder<T> nGramLength(int nGramLength) {
			this.nGramLength = nGramLength;
			return this;
		}
		
		/** Set the number of hash functions. */
		public AbstractBuilder<T> nHashFunctions(int nHashFunctions) {
			this.nHashFunctions = nHashFunctions;
			return this;
		}
		
		/** Set the encoding used for transforming strings into byte sequences
		 * before applying hash functions of encryption. */
		public AbstractBuilder<T> encoding(String encoding) {
			this.encoding = Charset.forName(encoding);
			return this;
		}
		
		/** Set the encoding used for transforming strings into byte sequences
		 * before applying hash functions of encryption. */
		public AbstractBuilder<T> encoding(Charset encoding) {
			this.encoding = encoding;
			return this;
		}
		
		/** Generate a control number generator based on the configuration of
		 * this builder. */
		public abstract T build();
	}
	
	/** Builder for simple control number generators (i.e. without keyed
	 * hashes). */
	public static class Builder extends AbstractBuilder<ControlNumberGenerator> {
		
		private Builder() {
		}
		
		@Override
		public ControlNumberGenerator build() {
			return new ControlNumberGenerator(this);
		}
	}
	
	/** Constructor, called by {@link Builder#build()}.
	 * 
	 * @param builder
	 *            The builder object from which to create the instance. */
	protected ControlNumberGenerator(AbstractBuilder<?> builder) {
		this.hashLength = builder.hashLength;
		this.nGramLength = builder.nGramLength;
		this.nHashFunctions = builder.nHashFunctions;
		this.encoding = builder.encoding;
		
		try {
			this.md5 = MessageDigest.getInstance("MD5");
			this.sha1 = MessageDigest.getInstance("SHA1");
		} catch (NoSuchAlgorithmException e) {
			throw new Error("ControlNumberGenerator needs MessageDigest algorithms MD5 and SHA1.", e);
		}
	}
}
