/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.icrawl.crawler.urls;

import com.google.common.collect.ImmutableMultimap;
import de.l3s.icrawl.crawler.urls.UrlNormalizer;
import io.mola.galimatias.GalimatiasParseException;
import io.mola.galimatias.NameValue;
import io.mola.galimatias.URL;
import io.mola.galimatias.URLSearchParameters;
import io.mola.galimatias.canonicalize.CombinedCanonicalizer;
import io.mola.galimatias.canonicalize.RFC3986Canonicalizer;
import io.mola.galimatias.canonicalize.StripPartCanonicalizer;
import io.mola.galimatias.canonicalize.URLCanonicalizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class UrlCanonicalizerNormalizer
implements UrlNormalizer {
    private static final Pattern EXCLUDE_PATTERN = Pattern.compile("^utm_|sess(ion)?id", 2);
    private static final Logger logger = LoggerFactory.getLogger(UrlCanonicalizerNormalizer.class);
    private final URLCanonicalizer canonicalizer = new CombinedCanonicalizer(new URLCanonicalizer[]{new StripParametersCanonicalizer(EXCLUDE_PATTERN), new StripPartCanonicalizer(StripPartCanonicalizer.Part.FRAGMENT), new RFC3986Canonicalizer()});

    @Override
    public String normalize(String url) {
        URL parsedUrl;
        try {
            parsedUrl = URL.parse((String)url);
        }
        catch (GalimatiasParseException e) {
            logger.trace("Invalid URL '{}', dropping", (Object)url, (Object)e);
            return null;
        }
        try {
            return this.canonicalizer.canonicalize(parsedUrl).toString();
        }
        catch (GalimatiasParseException e) {
            logger.debug("Could not canonicalize URL '{}', returning unchanged ", (Object)url, (Object)e);
            return url;
        }
    }

    public static class StripParametersCanonicalizer
    implements URLCanonicalizer {
        private final Pattern excludePattern;

        public StripParametersCanonicalizer(Pattern excludePattern) {
            this.excludePattern = excludePattern;
        }

        public URL canonicalize(URL url) throws GalimatiasParseException {
            return url.withQuery(this.canonicalizeQuery(new URLSearchParameters(url.query())));
        }

        private String canonicalizeQuery(URLSearchParameters searchParameters) {
            ImmutableMultimap.Builder finalParameters = ImmutableMultimap.builder();
            for (NameValue param : searchParameters) {
                if (this.excludePattern.matcher(param.name()).find()) continue;
                finalParameters.put((Object)param.name(), (Object)param.value());
            }
            ImmutableMultimap includedParameters = finalParameters.build();
            if (includedParameters.isEmpty()) {
                return null;
            }
            ArrayList sortedKeys = new ArrayList(includedParameters.keySet());
            Collections.sort(sortedKeys);
            StringBuilder sb = new StringBuilder(100);
            for (String key : sortedKeys) {
                for (String value : includedParameters.get((Object)key)) {
                    if (sb.length() > 0) {
                        sb.append('&');
                    }
                    sb.append(key);
                    if (value.isEmpty()) continue;
                    sb.append('=');
                    sb.append(value);
                }
            }
            return sb.toString();
        }
    }
}

