|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectcom.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
public class DerbyCrawlURLDatabase
Constructor Summary | |
---|---|
DerbyCrawlURLDatabase(HttpCrawlerConfig config,
boolean resume)
|
Method Summary | |
---|---|
int |
getActiveCount()
Gets the number of active URLs (currently being processed). |
CrawlURL |
getCached(String url)
Gets the cached URL from previous time crawler was run (e.g. |
int |
getProcessedCount()
Gets the number of URLs processed. |
int |
getQueueSize()
Gets the size of the URL queue (number of URLs left to process). |
boolean |
isActive(String url)
Whether the given URL is currently being processed (i.e. |
boolean |
isCacheEmpty()
Whether there are any URLs the the cache from a previous crawler run. |
boolean |
isProcessed(String url)
Whether the given URL has been processed. |
boolean |
isQueued(String url)
Whether the given URL is in the queue or not (waiting to be processed). |
boolean |
isQueueEmpty()
Whether there are any URLs to process in the queue. |
boolean |
isVanished(CrawlURL crawlURL)
Whether a url has been deleted. |
CrawlURL |
next()
Returns the next URL to be processed and marks it as being "active" (i.e. |
void |
processed(CrawlURL crawlURL)
Marks this URL as processed. |
void |
queue(String url,
int depth)
Queues a URL for future processing. |
void |
queueCache()
Queues URLs cached from a previous run so they can be processed again. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public DerbyCrawlURLDatabase(HttpCrawlerConfig config, boolean resume)
Method Detail |
---|
public final void queue(String url, int depth)
ICrawlURLDatabase
queue
in interface ICrawlURLDatabase
url
- the URL to eventually be processeddepth
- how many clicks away from starting URL(s)public final boolean isQueueEmpty()
ICrawlURLDatabase
isQueueEmpty
in interface ICrawlURLDatabase
true
if the queue is emptypublic final int getQueueSize()
ICrawlURLDatabase
getQueueSize
in interface ICrawlURLDatabase
public final boolean isQueued(String url)
ICrawlURLDatabase
isQueued
in interface ICrawlURLDatabase
url
- url
true
if the URL is in the queuepublic final CrawlURL next()
ICrawlURLDatabase
next
in interface ICrawlURLDatabase
public final boolean isActive(String url)
ICrawlURLDatabase
isActive
in interface ICrawlURLDatabase
url
- the url
true
if activepublic final int getActiveCount()
ICrawlURLDatabase
getActiveCount
in interface ICrawlURLDatabase
public CrawlURL getCached(String url)
ICrawlURLDatabase
getCached
in interface ICrawlURLDatabase
url
- URL cached from previous run
public final boolean isCacheEmpty()
ICrawlURLDatabase
isCacheEmpty
in interface ICrawlURLDatabase
true
if the cache is emptypublic final void processed(CrawlURL crawlURL)
ICrawlURLDatabase
processed
in interface ICrawlURLDatabase
public final boolean isProcessed(String url)
ICrawlURLDatabase
isProcessed
in interface ICrawlURLDatabase
url
- url
true
if processedpublic final int getProcessedCount()
ICrawlURLDatabase
getProcessedCount
in interface ICrawlURLDatabase
public final void queueCache()
ICrawlURLDatabase
queueCache
in interface ICrawlURLDatabase
public final boolean isVanished(CrawlURL crawlURL)
ICrawlURLDatabase
isVanished
in interface ICrawlURLDatabase
crawlURL
- the URL
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |