/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.icrawl.crawler;

import com.codahale.metrics.CsvReporter;
import com.codahale.metrics.Metric;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.ScheduledReporter;
import com.codahale.metrics.jvm.ThreadStatesGaugeSet;
import com.fasterxml.jackson.databind.Module;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import de.l3s.icrawl.crawler.ArchiveCrawlSpecification;
import de.l3s.icrawl.crawler.Crawler;
import de.l3s.icrawl.crawler.analysis.ResourceAnalyser;
import de.l3s.icrawl.crawler.analysis.ResourceAnalyserFactory;
import de.l3s.icrawl.crawler.io.CsvStorer;
import de.l3s.icrawl.crawler.io.ResultStorer;
import de.l3s.icrawl.crawler.io.ZipFileStorer;
import de.l3s.icrawl.crawler.scheduling.NumberOfUrlsStoppingCriterion;
import de.l3s.icrawl.crawler.ui.UiConfig;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Locale;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.data.jpa.JpaRepositoriesAutoConfiguration;
import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration;
import org.springframework.boot.context.embedded.EmbeddedServletContainerInitializedEvent;
import org.springframework.context.ApplicationListener;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Profile;
import org.springframework.context.support.PropertySourcesPlaceholderConfigurer;

@org.springframework.context.annotation.Configuration
@EnableAutoConfiguration(exclude={HibernateJpaAutoConfiguration.class, JpaRepositoriesAutoConfiguration.class})
@Import(value={UiConfig.class})
public class ArchiveCrawler
implements ApplicationListener<EmbeddedServletContainerInitializedEvent> {
    public static final String PROFILE_EVALUATION = "evaluation";
    public static final String PROFILE_EXTRACT = "extract";
    private static final Logger logger = LoggerFactory.getLogger(ArchiveCrawler.class);
    @Value(value="${cdxPath}")
    String indexPath;
    @Value(value="${warcRoot}")
    String dataPath;
    @Value(value="${numThreads:10}")
    int numThreads;
    @Value(value="${timeRelevanceThreshold:0.25}")
    float timeRelevanceThreshold;
    @Value(value="${docSimilarityWeight:0.5}")
    float docSimilarityWeight;
    private int serverPort;
    public static final String IDF_DICTIONARY_DE = "dictionary-DE.tsv.gz";
    @Inject
    StorerConfig storerConfig;
    @Value(value="${logdir}")
    File logDir;

    @Bean
    static PropertySourcesPlaceholderConfigurer propertySourcesPlaceholderConfigurer() {
        return new PropertySourcesPlaceholderConfigurer();
    }

    @Bean
    ResourceAnalyserFactory raf() {
        return new ResourceAnalyser.Factory(this.metrics(), this.timeRelevanceThreshold, this.docSimilarityWeight);
    }

    @Bean
    MetricRegistry metrics() {
        MetricRegistry metrics = new MetricRegistry();
        metrics.register("threads", (Metric)new ThreadStatesGaugeSet());
        return metrics;
    }

    @Bean
    ScheduledReporter reporter() {
        CsvReporter reporter = CsvReporter.forRegistry((MetricRegistry)this.metrics()).formatFor(Locale.ROOT).convertDurationsTo(TimeUnit.MILLISECONDS).convertRatesTo(TimeUnit.SECONDS).build(this.logDir);
        reporter.start(1L, TimeUnit.MINUTES);
        logger.info("Started logging metrics every minute");
        return reporter;
    }

    @Bean
    Configuration conf() {
        return HBaseConfiguration.create((Configuration)new YarnConfiguration());
    }

    @Bean
    Crawler crawler() throws IOException {
        return new Crawler(this.conf(), this.indexPath, this.dataPath, this.raf(), this.storerConfig.storerFactory(this.conf()), this.metrics(), this.numThreads);
    }

    @Bean
    Module jsr310Module() {
        return new JavaTimeModule();
    }

    public void onApplicationEvent(EmbeddedServletContainerInitializedEvent event) {
        this.serverPort = event.getEmbeddedServletContainer().getPort();
    }

    public int getServerPort() {
        return this.serverPort;
    }

    public static void main(String[] args) throws IOException {
        if (args.length < 1) {
            System.err.println("Usage: java " + Crawler.class.getName() + " specification [num_urls [weightingMethod [snapshotsToAnalyze]]");
            System.exit(1);
        }
        SpringApplication app = new SpringApplication(new Object[]{ArchiveCrawler.class});
        app.setWebEnvironment(false);
        ConfigurableApplicationContext context = app.run(args);
        Crawler crawler = (Crawler)context.getBean(Crawler.class);
        ArchiveCrawlSpecification spec = ArchiveCrawlSpecification.readFile(new File(args[0]));
        long numUrls = args.length >= 2 ? Long.parseLong(args[1]) : 10000L;
        ResourceAnalyser.WeightingMethod method = args.length >= 3 ? ResourceAnalyser.WeightingMethod.valueOf(args[2]) : ResourceAnalyser.WeightingMethod.CONTENT;
        int snapshotsToAnalyze = args.length >= 4 ? Integer.parseInt(args[3]) : 10;
        crawler.crawlContinuously(spec, new NumberOfUrlsStoppingCriterion(numUrls), method, -1.7976931348623157E308, snapshotsToAnalyze);
        ((ScheduledReporter)context.getBean(ScheduledReporter.class)).report();
        crawler.shutdown();
    }

    @org.springframework.context.annotation.Configuration
    @Profile(value={"extract"})
    public static class ZipFileStorerConfig
    implements StorerConfig {
        @Value(value="${maxUrls}")
        public int maxUrls;
        @Value(value="${outputDirectory}")
        public String outputDirectory;

        @Override
        public ResultStorer.Factory storerFactory(Configuration conf) throws IOException {
            Path baseDirectory = new Path(this.outputDirectory);
            FileSystem fs = FileSystem.get((Configuration)conf);
            fs.mkdirs(baseDirectory);
            return name -> {
                logger.info("Creating new ZipFileStorer for '{}'", (Object)name);
                return new ZipFileStorer((OutputStream)fs.create(new Path(baseDirectory, name + ".zip"), true), this.maxUrls);
            };
        }
    }

    @org.springframework.context.annotation.Configuration
    @Profile(value={"evaluation"})
    public static class CsvStorerConfig
    implements StorerConfig {
        @Value(value="${outputDirectory}")
        public String outputDirectory;

        @Override
        @Bean
        public ResultStorer.Factory storerFactory(Configuration conf) throws IOException {
            Path baseDirectory = new Path(this.outputDirectory);
            FileSystem fs = FileSystem.get((Configuration)conf);
            if (!fs.exists(baseDirectory)) {
                fs.mkdirs(baseDirectory, FsPermission.valueOf((String)"-rwxrwxrwx"));
            }
            return name -> new CsvStorer(conf, new Path(baseDirectory, name + ".csv"));
        }
    }

    public static interface StorerConfig {
        public ResultStorer.Factory storerFactory(Configuration var1) throws IOException;
    }
}

