public class JDBCCrawlDataStore extends AbstractCrawlDataStore
Modifier and Type | Field and Description |
---|---|
static String |
TABLE_ACTIVE |
static String |
TABLE_CACHE |
static String |
TABLE_PROCESSED_INVALID |
static String |
TABLE_PROCESSED_VALID |
static String |
TABLE_QUEUE |
Constructor and Description |
---|
JDBCCrawlDataStore(String path,
boolean resume,
IJDBCSerializer serializer) |
Modifier and Type | Method and Description |
---|---|
void |
close()
Closes a database connection.
|
int |
getActiveCount()
Gets the number of active references (currently being processed).
|
ICrawlData |
getCached(String reference)
Gets the cached reference from previous time crawler was run
(e.g.
|
Iterator<ICrawlData> |
getCacheIterator()
Gets the cache iterator.
|
ICrawlData |
getProcessed(String reference)
Gets an already processed reference from the current crawl session.
|
int |
getProcessedCount()
Gets the number of references processed.
|
int |
getQueueSize()
Gets the size of the reference queue (number of
references left to process).
|
boolean |
isActive(String reference)
Whether the given reference is currently being processed (i.e.
|
boolean |
isCacheEmpty()
Whether there are any references the the cache from a previous crawler
run.
|
boolean |
isProcessed(String reference)
Whether the given reference has been processed.
|
boolean |
isQueued(String reference)
Whether the given reference is in the queue or not
(waiting to be processed).
|
boolean |
isQueueEmpty()
Whether there are any references to process in the queue.
|
ICrawlData |
nextQueued()
Returns the next reference to be processed from the queue and marks it as
being "active" (i.e.
|
void |
processed(ICrawlData crawlData)
Marks this reference as processed.
|
void |
queue(ICrawlData crawlData)
Queues a reference for future processing.
|
public static final String TABLE_QUEUE
public static final String TABLE_ACTIVE
public static final String TABLE_CACHE
public static final String TABLE_PROCESSED_VALID
public static final String TABLE_PROCESSED_INVALID
public JDBCCrawlDataStore(String path, boolean resume, IJDBCSerializer serializer)
public final void queue(ICrawlData crawlData)
ICrawlDataStore
Queues a reference for future processing.
crawlData
- the reference to eventually be processedpublic final void processed(ICrawlData crawlData)
ICrawlDataStore
crawlData
- processed referencepublic final boolean isQueueEmpty()
ICrawlDataStore
true
if the queue is emptypublic final int getQueueSize()
ICrawlDataStore
public final boolean isQueued(String reference)
ICrawlDataStore
reference
- the referencetrue
if the reference is in the queuepublic final ICrawlData nextQueued()
ICrawlDataStore
public final boolean isActive(String reference)
ICrawlDataStore
reference
- the referencetrue
if activepublic final int getActiveCount()
ICrawlDataStore
public ICrawlData getCached(String reference)
ICrawlDataStore
reference
- reference cached from previous runpublic final boolean isCacheEmpty()
ICrawlDataStore
true
if the cache is emptypublic final boolean isProcessed(String reference)
ICrawlDataStore
reference
- the referencetrue
if processedpublic final int getProcessedCount()
ICrawlDataStore
public ICrawlData getProcessed(String reference)
ICrawlDataStore
reference
- reference to getpublic Iterator<ICrawlData> getCacheIterator()
ICrawlDataStore
public void close()
ICrawlDataStore
Copyright © 2014–2021 Norconex Inc.. All rights reserved.