com.norconex.collector.http.crawler
Class HttpCrawlerConfig

java.lang.Object
  extended by com.norconex.collector.http.crawler.HttpCrawlerConfig
All Implemented Interfaces:
Serializable, Cloneable

public class HttpCrawlerConfig
extends Object
implements Cloneable, Serializable

See Also:
Serialized Form

Constructor Summary
HttpCrawlerConfig()
           
 
Method Summary
protected  Object clone()
           
 ICommitter getCommitter()
           
 IHttpCrawlerEventListener[] getCrawlerListeners()
           
 ICrawlURLDatabaseFactory getCrawlURLDatabaseFactory()
           
 IDelayResolver getDelayResolver()
           
 IHttpClientInitializer getHttpClientInitializer()
           
 IHttpDocumentChecksummer getHttpDocumentChecksummer()
           
 IHttpDocumentFetcher getHttpDocumentFetcher()
           
 IHttpDocumentFilter[] getHttpDocumentfilters()
           
 IHttpHeadersChecksummer getHttpHeadersChecksummer()
           
 IHttpHeadersFetcher getHttpHeadersFetcher()
           
 IHttpHeadersFilter[] getHttpHeadersFilters()
           
 String getId()
           
 ImporterConfig getImporterConfig()
           
 int getMaxDepth()
           
 int getMaxURLs()
           
 int getNumThreads()
           
 IHttpDocumentProcessor[] getPostImportProcessors()
           
 IHttpDocumentProcessor[] getPreImportProcessors()
           
 IRobotsTxtProvider getRobotsTxtProvider()
           
 String[] getStartURLs()
           
 IURLExtractor getUrlExtractor()
           
 IURLFilter[] getURLFilters()
           
 IURLNormalizer getUrlNormalizer()
           
 File getWorkDir()
           
 boolean isDeleteOrphans()
           
 boolean isIgnoreRobotsTxt()
           
 boolean isKeepDownloads()
           
 void setCommitter(ICommitter committer)
           
 void setCrawlerListeners(IHttpCrawlerEventListener[] crawlerListeners)
           
 void setCrawlURLDatabaseFactory(ICrawlURLDatabaseFactory crawlURLDatabaseFactory)
           
 void setDelayResolver(IDelayResolver delayResolver)
           
 void setDeleteOrphans(boolean deleteOrphans)
           
 void setHttpClientInitializer(IHttpClientInitializer httpClientInitializer)
           
 void setHttpDocumentChecksummer(IHttpDocumentChecksummer httpDocumentChecksummer)
           
 void setHttpDocumentFetcher(IHttpDocumentFetcher httpDocumentFetcher)
           
 void setHttpDocumentfilters(IHttpDocumentFilter[] documentfilters)
           
 void setHttpHeadersChecksummer(IHttpHeadersChecksummer httpHeadersChecksummer)
           
 void setHttpHeadersFetcher(IHttpHeadersFetcher httpHeadersFetcher)
           
 void setHttpHeadersFilters(IHttpHeadersFilter[] httpHeadersFilters)
           
 void setId(String id)
           
 void setIgnoreRobotsTxt(boolean ignoreRobotsTxt)
           
 void setImporterConfig(ImporterConfig importerConfig)
           
 void setKeepDownloads(boolean keepDownloads)
           
 void setMaxDepth(int depth)
           
 void setMaxURLs(int maxURLs)
           
 void setNumThreads(int numThreads)
           
 void setPostImportProcessors(IHttpDocumentProcessor[] httpPostProcessors)
           
 void setPreImportProcessors(IHttpDocumentProcessor[] httpPreProcessors)
           
 void setRobotsTxtProvider(IRobotsTxtProvider robotsTxtProvider)
           
 void setStartURLs(String[] startURLs)
           
 void setUrlExtractor(IURLExtractor urlExtractor)
           
 void setURLFilters(IURLFilter[] urlFilters)
           
 void setUrlNormalizer(IURLNormalizer urlNormalizer)
           
 void setWorkDir(File workDir)
           
 
Methods inherited from class java.lang.Object
equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HttpCrawlerConfig

public HttpCrawlerConfig()
Method Detail

getId

public String getId()

setId

public void setId(String id)

getStartURLs

public String[] getStartURLs()

setStartURLs

public void setStartURLs(String[] startURLs)

setMaxDepth

public void setMaxDepth(int depth)

getMaxDepth

public int getMaxDepth()

setWorkDir

public void setWorkDir(File workDir)

getWorkDir

public File getWorkDir()

getNumThreads

public int getNumThreads()

setNumThreads

public void setNumThreads(int numThreads)

getMaxURLs

public int getMaxURLs()

setMaxURLs

public void setMaxURLs(int maxURLs)

getHttpDocumentfilters

public IHttpDocumentFilter[] getHttpDocumentfilters()

setHttpDocumentfilters

public void setHttpDocumentfilters(IHttpDocumentFilter[] documentfilters)

getURLFilters

public IURLFilter[] getURLFilters()

setURLFilters

public void setURLFilters(IURLFilter[] urlFilters)

getImporterConfig

public ImporterConfig getImporterConfig()

setImporterConfig

public void setImporterConfig(ImporterConfig importerConfig)

getHttpClientInitializer

public IHttpClientInitializer getHttpClientInitializer()

setHttpClientInitializer

public void setHttpClientInitializer(IHttpClientInitializer httpClientInitializer)

getHttpDocumentFetcher

public IHttpDocumentFetcher getHttpDocumentFetcher()

setHttpDocumentFetcher

public void setHttpDocumentFetcher(IHttpDocumentFetcher httpDocumentFetcher)

getHttpHeadersFetcher

public IHttpHeadersFetcher getHttpHeadersFetcher()

setHttpHeadersFetcher

public void setHttpHeadersFetcher(IHttpHeadersFetcher httpHeadersFetcher)

getUrlExtractor

public IURLExtractor getUrlExtractor()

setUrlExtractor

public void setUrlExtractor(IURLExtractor urlExtractor)

getRobotsTxtProvider

public IRobotsTxtProvider getRobotsTxtProvider()

setRobotsTxtProvider

public void setRobotsTxtProvider(IRobotsTxtProvider robotsTxtProvider)

getUrlNormalizer

public IURLNormalizer getUrlNormalizer()

setUrlNormalizer

public void setUrlNormalizer(IURLNormalizer urlNormalizer)

isDeleteOrphans

public boolean isDeleteOrphans()

setDeleteOrphans

public void setDeleteOrphans(boolean deleteOrphans)

getDelayResolver

public IDelayResolver getDelayResolver()

setDelayResolver

public void setDelayResolver(IDelayResolver delayResolver)

getCrawlerListeners

public IHttpCrawlerEventListener[] getCrawlerListeners()

setCrawlerListeners

public void setCrawlerListeners(IHttpCrawlerEventListener[] crawlerListeners)

getHttpHeadersFilters

public IHttpHeadersFilter[] getHttpHeadersFilters()

setHttpHeadersFilters

public void setHttpHeadersFilters(IHttpHeadersFilter[] httpHeadersFilters)

getPreImportProcessors

public IHttpDocumentProcessor[] getPreImportProcessors()

setPreImportProcessors

public void setPreImportProcessors(IHttpDocumentProcessor[] httpPreProcessors)

getPostImportProcessors

public IHttpDocumentProcessor[] getPostImportProcessors()

setPostImportProcessors

public void setPostImportProcessors(IHttpDocumentProcessor[] httpPostProcessors)

isIgnoreRobotsTxt

public boolean isIgnoreRobotsTxt()

setIgnoreRobotsTxt

public void setIgnoreRobotsTxt(boolean ignoreRobotsTxt)

getCommitter

public ICommitter getCommitter()

setCommitter

public void setCommitter(ICommitter committer)

isKeepDownloads

public boolean isKeepDownloads()

setKeepDownloads

public void setKeepDownloads(boolean keepDownloads)

getHttpHeadersChecksummer

public IHttpHeadersChecksummer getHttpHeadersChecksummer()

setHttpHeadersChecksummer

public void setHttpHeadersChecksummer(IHttpHeadersChecksummer httpHeadersChecksummer)

getHttpDocumentChecksummer

public IHttpDocumentChecksummer getHttpDocumentChecksummer()

setHttpDocumentChecksummer

public void setHttpDocumentChecksummer(IHttpDocumentChecksummer httpDocumentChecksummer)

getCrawlURLDatabaseFactory

public ICrawlURLDatabaseFactory getCrawlURLDatabaseFactory()

setCrawlURLDatabaseFactory

public void setCrawlURLDatabaseFactory(ICrawlURLDatabaseFactory crawlURLDatabaseFactory)

clone

protected Object clone()
                throws CloneNotSupportedException
Overrides:
clone in class Object
Throws:
CloneNotSupportedException


Copyright © 2009-2013 Norconex Inc.. All Rights Reserved.