public class Crawler extends Object
| Modifier and Type | Field and Description |
|---|---|
static String |
NUM_URLS |
static String |
RELEVANCE_THRESHOLD |
static String |
SNAPSHOTS_TO_ANALYZE |
static String |
WEIGHTING_METHOD |
| Constructor and Description |
|---|
Crawler(org.apache.hadoop.conf.Configuration conf,
String indexPath,
String dataPath,
ResourceAnalyserFactory analyserFactory,
ResultStorer.Factory storerFactory,
com.codahale.metrics.MetricRegistry metrics,
int numThreads) |
| Modifier and Type | Method and Description |
|---|---|
void |
crawlContinuously(ArchiveCrawlSpecification spec,
StoppingCriterion stoppingCriterion,
ResourceAnalyser.WeightingMethod method,
double relevanceThreshold,
int snapshotsToAnalyze) |
Optional<ArchiveCrawlSpecification> |
getCurrentSpec() |
void |
shutdown() |
void |
stop(boolean interruptRunningFetches) |
public static final String NUM_URLS
public static final String WEIGHTING_METHOD
public static final String RELEVANCE_THRESHOLD
public static final String SNAPSHOTS_TO_ANALYZE
public Crawler(org.apache.hadoop.conf.Configuration conf,
String indexPath,
String dataPath,
ResourceAnalyserFactory analyserFactory,
ResultStorer.Factory storerFactory,
com.codahale.metrics.MetricRegistry metrics,
int numThreads)
throws IOException
IOExceptionpublic void crawlContinuously(ArchiveCrawlSpecification spec, StoppingCriterion stoppingCriterion, ResourceAnalyser.WeightingMethod method, double relevanceThreshold, int snapshotsToAnalyze) throws IOException
IOExceptionpublic void stop(boolean interruptRunningFetches)
public void shutdown()
public Optional<ArchiveCrawlSpecification> getCurrentSpec()
Copyright © 2017. All rights reserved.