package de.pfabulist.loracle.text;

import com.esotericsoftware.minlog.Log;
import de.pfabulist.frex.Frex;

import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static de.pfabulist.frex.Frex.fullWord;
import static de.pfabulist.frex.Frex.or;
import static de.pfabulist.frex.Frex.txt;
import static de.pfabulist.loracle.text.Normalizer.UrlVeriables.relevant;
import static de.pfabulist.loracle.text.Normalizer.WordVersionVariables.word;
import static de.pfabulist.roast.NonnullCheck._nn;

/**
 * Copyright (c) 2006 - 2017, Stephan Pfab
 * SPDX-License-Identifier: BSD-2-Clause
 */

public class Normalizer {

    static private final String WHITESPACE = Frex.or( Frex.whitespace(),
                                                      Frex.txt( ',' ),
                                                      Frex.txt( '-' ),
                                                      Frex.txt( '_' ),
                                                      Frex.txt( '!' ),
                                                      Frex.txt( '"' ),
                                                      Frex.txt( '\'' ),
                                                      Frex.txt( '/' ),
                                                      Frex.txt( '(' ),
                                                      Frex.txt( '*' ),
                                                      Frex.txt( ')') ).
            buildPattern().toString();

    static private final Pattern maybe = or( fullWord( "License" ),
                                             fullWord( "Licence" ),
                                             fullWord( "Lisense" ),
                                             fullWord( "Source" ),
                                             fullWord( "Code" ),
                                             fullWord( "The" ),
                                             fullWord( "Version" ),
                                             fullWord( "Vesion" ),
                                             fullWord( "Software" ),
                                             fullWord( "General" ),
                                             fullWord( "Agreement" ),
                                             fullWord( "Free" ),
                                             fullWord( "Open" ),
                                             fullWord( "Public" ),
                                             fullWord( "General" ),
                                             fullWord( "Copyright" ),
                                             fullWord( "Like" ), // todo hmm
                                             fullWord( "v" ) ).buildCaseInsensitivePattern();
    
    static public final Pattern spaces = Frex.whitespace().atLeast( 2 ).buildPattern();
    static private final Pattern vVersion = txt( "v" ).then( or( Frex.number(), txt( '.' ), txt( ',' ) ).oneOrMore() ).buildCaseInsensitivePattern();
    static private final Pattern version = or( Frex.number(), txt( '.' ) ).oneOrMore().buildCaseInsensitivePattern();

    enum WordVersionVariables {
        word,
        version
    }

    static private final Pattern wordVversion = Frex.alpha().oneOrMore().var( word ).
            then( Frex.txt( "v" ) ).
            then( or( Frex.number(), txt( '.' ) ).oneOrMore().var( WordVersionVariables.version ) ).
            buildCaseInsensitivePattern();

    static private final Pattern wordVersion = Frex.alpha().oneOrMore().var( word ).
            then( or( Frex.number(), txt( '.' ) ).oneOrMore().var( WordVersionVariables.version ) ).
            buildCaseInsensitivePattern();

    enum UrlVeriables {
        relevant
    }



    // add a number to StringBuilder
    // cuts '.0' it ends with it

    static String normalizeNumber( String num ) {
        if ( num.endsWith( ".0" )) {
            return normalizeNumber( num.substring( 0, num.length() - 2 ));
        }

        return num;
    }

    static void addNumber( StringBuilder sb, String num ) {
        if ( num.endsWith( ".0" )) {
            sb.append( num.substring( 0, num.length() - 2 ) );
        } else {
            sb.append( num );
        }
    }

    public static String reduce( String in ) {
        StringBuilder sb = new StringBuilder();

        for( String word : in.toLowerCase( Locale.US ).split( WHITESPACE ) ) {
            if( word.isEmpty() ) {
                // ignore
            } else if( maybe.matcher( word ).matches() ) {
                // ignore ret = ret.then( txt( word).zeroOrOnce() );
            } else if( vVersion.matcher( word ).matches() ) {
                if ( !word.equals( "v." )) { // todo what v2, v,3
                    sb.append( normalizeNumber( word.substring( 1 ) ) );
                    //addNumber( sb, word.substring( 1 ) );
                }
                //else { skip }

            } else {
                Matcher matcher = wordVversion.matcher( word );
                if( matcher.matches() ) {
                    sb.append( matcher.group( "word" ) );
                    sb.append( " " );
                    sb.append( normalizeNumber( _nn( matcher.group( "version" )) ) );
                    // addNumber( sb, _nn( matcher.group( "version" )));
                } else {

                    Matcher wv = wordVersion.matcher( word );
                    if ( wv.matches() ) {
                        sb.append( wv.group( "word" ) );
                        sb.append( " " );
                        addNumber( sb, _nn( wv.group( "version" )));
                    } else {
                        addNumber( sb, word );
                    }
                }
            }

            sb.append( " " );
        }

        String ret = sb.toString().trim();

        if( !ret.isEmpty() && !version.matcher( ret ).matches() ) {
            return spaces.matcher( ret ).replaceAll( " " );
        }

        Log.debug( "license name composed of fill words only: " + in );
        return spaces.matcher( in.toLowerCase( Locale.US )).replaceAll( " " );
    }

    public static String trim( String in ) {
        return in.toLowerCase( Locale.US ).replaceAll( ",", " " ).trim();
    }


//    public static Optional<String> normalizeUrl( String url ) {
//        Matcher matcher = urlPattern.matcher( url );
//        if( !matcher.matches() ) {
//            return Optional.empty();
//        }
//
//        return Optional.of( _nn( matcher.group( "relevant" ) ).toLowerCase( Locale.US ));
//
//    }

    private final static Pattern htmlws = Frex.or( Frex.whitespace(), Frex.txt( '\r' ), Frex.txt( '\n' ) ).oneOrMore().buildPattern();

    public static String norm( String txt ) {
        return Arrays.stream( txt.split( "\n" ) ).
                map( l -> {
                    l = l.trim();
                    if( l.startsWith( "*" ) ) {
                        return l.substring( 1 ).trim();
                    } else if( l.startsWith( "//" ) ) {
                        return l.substring( 2 ).trim();
                    } else if( l.startsWith( "!" ) ) {
                        return l.substring( 1 ).trim();
                    } else {
                        return l;
                    }
                } ).
                collect( Collectors.joining( " ") ).
                replaceAll( htmlws.toString(), " " );
    }

    private static final String urlspecial = txt( ':' ).or( txt( '/' )).or( txt( '*' )).or( txt( '"')).or( txt( '<')).or( txt('>' )).or( txt( '?')).or( txt( '\\') ).or( txt(' ') ).buildPattern().toString();

    public static String toFilename( String str ) {
        return str.replaceAll( urlspecial, "_" );
    }

    public static List<String> tooSimpleLongNames = Arrays.asList( "map", "par", "free", "foundation", "fsf",
                                                                 "initial developer", "wide", "attribution",
            "only", "attribution only", "hp", "microsoft", "json", "closed", "government", "doc",
        "directory", "jetty", "sequence", "fork", "open", "regexp", "berkeley" );
//    license, licence ?


}
