/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.icrawl.crawler.tools;

import com.google.common.escape.CharEscaperBuilder;
import com.google.common.escape.Escaper;
import com.google.common.io.Files;
import com.google.common.net.UrlEscapers;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;

public class QueryKeywordsCreator {
    private static final Escaper GERMAN_ESCAPER = new CharEscaperBuilder().addEscape(' ', ".?").addEscape('\u00e4', "ae").addEscape('\u00f6', "oe").addEscape('\u00fc', "ue").addEscape('\u00df', "ss").toEscaper();

    public static void main(String[] args) throws IOException {
        if (args.length < 1) {
            System.out.println("Usage: java " + QueryKeywordsCreator.class.getName() + " topicsFile.tsv");
            System.exit(1);
        }
        try (BufferedReader reader = Files.newReader((File)new File(args[0]), (Charset)StandardCharsets.UTF_8);){
            String line;
            boolean readHeader = false;
            while ((line = reader.readLine()) != null) {
                if (!readHeader) {
                    readHeader = true;
                    continue;
                }
                String[] parts = line.split("\t", 8);
                String code = parts[0];
                List<String> keywords = Arrays.asList(parts[7].split(",\\s*"));
                HashSet<String> queryKeywords = new HashSet<String>();
                for (String keyword : keywords) {
                    String lcKeyword = keyword.toLowerCase(Locale.GERMAN);
                    queryKeywords.add(lcKeyword.replaceAll("\\s+", ".?"));
                    queryKeywords.add(GERMAN_ESCAPER.escape(lcKeyword));
                    queryKeywords.add(StringUtils.stripAccents((String)lcKeyword).replaceAll("\\s+", ".?"));
                    queryKeywords.add(UrlEscapers.urlPathSegmentEscaper().escape(lcKeyword).replaceAll("\\+", ".?"));
                }
                String query = queryKeywords.size() == 1 ? (String)queryKeywords.iterator().next() : queryKeywords.stream().collect(Collectors.joining(")|(", "(", ")"));
                System.out.format("%s\t%s%n", code, query);
            }
        }
    }
}

