public class HttpCrawler extends AbstractCrawler
AbstractCrawler.CopyIfNullBeanUtilsBean
Constructor and Description |
---|
HttpCrawler(HttpCrawlerConfig crawlerConfig)
Constructor.
|
createCrawlDataStore, deleteCacheOrphans, execute, fireCrawlerEvent, getBaseDownloadDir, getCrawlerDownloadDir, getCrawlerEventManager, getId, getImporter, getStreamFactory, handleOrphans, isMaxDocuments, isStopped, processNextReference, processReferences, reprocessCacheOrphans, resumeExecution, startExecution
execute
public HttpCrawler(HttpCrawlerConfig crawlerConfig)
crawlerConfig
- HTTP crawler configurationpublic HttpCrawlerConfig getCrawlerConfig()
getCrawlerConfig
in interface ICrawler
getCrawlerConfig
in class AbstractCrawler
public org.apache.http.client.HttpClient getHttpClient()
public ISitemapResolver getSitemapResolver()
public void stop(IJobStatus jobStatus, JobSuite suite)
stop
in interface IJob
stop
in class AbstractCrawler
protected void prepareExecution(JobStatusUpdater statusUpdater, JobSuite suite, ICrawlDataStore crawlDataStore, boolean resume)
prepareExecution
in class AbstractCrawler
protected void executeQueuePipeline(ICrawlData crawlData, ICrawlDataStore crawlDataStore)
executeQueuePipeline
in class AbstractCrawler
protected ImporterDocument wrapDocument(ICrawlData crawlData, ImporterDocument document)
wrapDocument
in class AbstractCrawler
protected void initCrawlData(ICrawlData crawlData, ICrawlData cachedCrawlData, ImporterDocument document)
initCrawlData
in class AbstractCrawler
protected ImporterResponse executeImporterPipeline(ImporterPipelineContext importerContext)
executeImporterPipeline
in class AbstractCrawler
protected BaseCrawlData createEmbeddedCrawlData(String embeddedReference, ICrawlData parentCrawlData)
createEmbeddedCrawlData
in class AbstractCrawler
protected void executeCommitterPipeline(ICrawler crawler, ImporterDocument doc, ICrawlDataStore crawlDataStore, BaseCrawlData crawlData, BaseCrawlData cachedCrawlData)
executeCommitterPipeline
in class AbstractCrawler
protected void beforeFinalizeDocumentProcessing(BaseCrawlData crawlData, ICrawlDataStore store, ImporterDocument doc, ICrawlData cachedData)
beforeFinalizeDocumentProcessing
in class AbstractCrawler
protected void markReferenceVariationsAsProcessed(BaseCrawlData crawlData, ICrawlDataStore crawlDataStore)
markReferenceVariationsAsProcessed
in class AbstractCrawler
protected void cleanupExecution(JobStatusUpdater statusUpdater, JobSuite suite, ICrawlDataStore refStore)
cleanupExecution
in class AbstractCrawler
Copyright © 2009–2021 Norconex Inc.. All rights reserved.