public class HttpCrawler extends AbstractCrawler
AbstractCrawler.CopyIfNullBeanUtilsBean| Constructor and Description |
|---|
HttpCrawler(HttpCrawlerConfig crawlerConfig)
Constructor.
|
createCrawlDataStore, deleteCacheOrphans, execute, fireCrawlerEvent, getBaseDownloadDir, getCrawlerDownloadDir, getCrawlerEventManager, getId, getImporter, getStreamFactory, handleOrphans, isMaxDocuments, isStopped, processNextReference, processReferences, reprocessCacheOrphans, resumeExecution, startExecutionexecutepublic HttpCrawler(HttpCrawlerConfig crawlerConfig)
crawlerConfig - HTTP crawler configurationpublic HttpCrawlerConfig getCrawlerConfig()
getCrawlerConfig in interface ICrawlergetCrawlerConfig in class AbstractCrawlerpublic org.apache.http.client.HttpClient getHttpClient()
public ISitemapResolver getSitemapResolver()
public void stop(IJobStatus jobStatus, JobSuite suite)
stop in interface IJobstop in class AbstractCrawlerprotected void prepareExecution(JobStatusUpdater statusUpdater, JobSuite suite, ICrawlDataStore crawlDataStore, boolean resume)
prepareExecution in class AbstractCrawlerprotected void executeQueuePipeline(ICrawlData crawlData, ICrawlDataStore crawlDataStore)
executeQueuePipeline in class AbstractCrawlerprotected ImporterDocument wrapDocument(ICrawlData crawlData, ImporterDocument document)
wrapDocument in class AbstractCrawlerprotected void initCrawlData(ICrawlData crawlData, ICrawlData cachedCrawlData, ImporterDocument document)
initCrawlData in class AbstractCrawlerprotected ImporterResponse executeImporterPipeline(ImporterPipelineContext importerContext)
executeImporterPipeline in class AbstractCrawlerprotected BaseCrawlData createEmbeddedCrawlData(String embeddedReference, ICrawlData parentCrawlData)
createEmbeddedCrawlData in class AbstractCrawlerprotected void executeCommitterPipeline(ICrawler crawler, ImporterDocument doc, ICrawlDataStore crawlDataStore, BaseCrawlData crawlData, BaseCrawlData cachedCrawlData)
executeCommitterPipeline in class AbstractCrawlerprotected void beforeFinalizeDocumentProcessing(BaseCrawlData crawlData, ICrawlDataStore store, ImporterDocument doc, ICrawlData cachedData)
beforeFinalizeDocumentProcessing in class AbstractCrawlerprotected void markReferenceVariationsAsProcessed(BaseCrawlData crawlData, ICrawlDataStore crawlDataStore)
markReferenceVariationsAsProcessed in class AbstractCrawlerprotected void cleanupExecution(JobStatusUpdater statusUpdater, JobSuite suite, ICrawlDataStore refStore)
cleanupExecution in class AbstractCrawlerCopyright © 2009–2021 Norconex Inc.. All rights reserved.