public class HttpCrawler extends Crawler
HttpCrawlerConfig
Crawler.ReferenceProcessStatus
Constructor and Description |
---|
HttpCrawler(HttpCrawlerConfig crawlerConfig,
HttpCollector collector)
Constructor.
|
Modifier and Type | Method and Description |
---|---|
protected void |
afterCrawlerExecution() |
protected void |
beforeCrawlerExecution(boolean resume) |
protected void |
beforeFinalizeDocumentProcessing(CrawlDoc doc) |
protected CrawlDocInfo |
createChildDocInfo(String embeddedReference,
CrawlDocInfo parentCrawlData) |
protected void |
executeCommitterPipeline(Crawler crawler,
CrawlDoc doc) |
protected ImporterResponse |
executeImporterPipeline(ImporterPipelineContext importerContext) |
protected void |
executeQueuePipeline(CrawlDocInfo crawlRef) |
protected Class<? extends CrawlDocInfo> |
getCrawlDocInfoType() |
HttpCrawlerConfig |
getCrawlerConfig() |
IDataStore<String> |
getDedupDocumentStore() |
IDataStore<String> |
getDedupMetadataStore() |
HttpFetchClient |
getHttpFetchClient() |
ISitemapResolver |
getSitemapResolver() |
protected void |
initCrawlDoc(CrawlDoc doc) |
protected boolean |
isQueueInitialized() |
protected void |
markReferenceVariationsAsProcessed(CrawlDocInfo crawlRef) |
clean, deleteCacheOrphans, destroyCrawler, doExecute, exportDataStore, getCollector, getCommitterService, getDataStoreEngine, getDocInfoService, getDownloadDir, getEventManager, getId, getImporter, getMonitor, getStreamFactory, getTempDir, getWorkDir, handleOrphans, importDataStore, initCrawler, isMaxDocuments, isStopped, processNextReference, processReferences, reprocessCacheOrphans, start, stop, toString
public HttpCrawler(HttpCrawlerConfig crawlerConfig, HttpCollector collector)
crawlerConfig
- HTTP crawler configurationcollector
- http collector this crawler belongs topublic HttpCrawlerConfig getCrawlerConfig()
getCrawlerConfig
in class Crawler
public HttpFetchClient getHttpFetchClient()
public ISitemapResolver getSitemapResolver()
public IDataStore<String> getDedupMetadataStore()
public IDataStore<String> getDedupDocumentStore()
protected boolean isQueueInitialized()
isQueueInitialized
in class Crawler
protected void beforeCrawlerExecution(boolean resume)
beforeCrawlerExecution
in class Crawler
protected void afterCrawlerExecution()
afterCrawlerExecution
in class Crawler
protected void executeQueuePipeline(CrawlDocInfo crawlRef)
executeQueuePipeline
in class Crawler
protected Class<? extends CrawlDocInfo> getCrawlDocInfoType()
getCrawlDocInfoType
in class Crawler
protected void initCrawlDoc(CrawlDoc doc)
initCrawlDoc
in class Crawler
protected ImporterResponse executeImporterPipeline(ImporterPipelineContext importerContext)
executeImporterPipeline
in class Crawler
protected CrawlDocInfo createChildDocInfo(String embeddedReference, CrawlDocInfo parentCrawlData)
createChildDocInfo
in class Crawler
protected void executeCommitterPipeline(Crawler crawler, CrawlDoc doc)
executeCommitterPipeline
in class Crawler
protected void beforeFinalizeDocumentProcessing(CrawlDoc doc)
beforeFinalizeDocumentProcessing
in class Crawler
protected void markReferenceVariationsAsProcessed(CrawlDocInfo crawlRef)
markReferenceVariationsAsProcessed
in class Crawler
Copyright © 2009–2023 Norconex Inc.. All rights reserved.