A B C D E F G H I J K L M N O P R S T U V W X Y Z _ 

A

abort(String, String) - Method in class org.apache.nutch.api.impl.RAMJobManager
 
abort(String, String) - Method in interface org.apache.nutch.api.JobManager
 
abort(String, String) - Method in class org.apache.nutch.api.resources.JobResource
 
AbstractBasePage<T> - Class in org.apache.nutch.webui.pages
 
AbstractBasePage() - Constructor for class org.apache.nutch.webui.pages.AbstractBasePage
 
AbstractFetchSchedule - Class in org.apache.nutch.crawl
This class provides common methods for implementations of FetchSchedule.
AbstractFetchSchedule() - Constructor for class org.apache.nutch.crawl.AbstractFetchSchedule
 
AbstractFetchSchedule(Configuration) - Constructor for class org.apache.nutch.crawl.AbstractFetchSchedule
 
AbstractResource - Class in org.apache.nutch.api.resources
 
AbstractResource() - Constructor for class org.apache.nutch.api.resources.AbstractResource
 
AbstractTestbedHandler - Class in org.apache.nutch.tools.proxy
 
AbstractTestbedHandler() - Constructor for class org.apache.nutch.tools.proxy.AbstractTestbedHandler
 
accept - Variable in class org.apache.nutch.protocol.http.api.HttpBase
The "Accept" request header value.
accept() - Method in class org.apache.nutch.urlfilter.api.RegexRule
Return if this rule is used for filtering-in or out.
acceptLanguage - Variable in class org.apache.nutch.protocol.http.api.HttpBase
The "Accept-Language" request header value.
ACCESS_DENIED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Access denied - authorization required, but missing/incorrect.
AdaptiveFetchSchedule - Class in org.apache.nutch.crawl
This class implements an adaptive re-fetch algorithm.
AdaptiveFetchSchedule() - Constructor for class org.apache.nutch.crawl.AdaptiveFetchSchedule
 
add(String, String) - Method in class org.apache.nutch.indexer.NutchDocument
 
add(String, String) - Method in class org.apache.nutch.metadata.Metadata
Add a metadata name/value mapping.
add(String, String) - Method in class org.apache.nutch.metadata.SpellCheckedMetadata
 
add(byte[], byte[]) - Static method in class org.apache.nutch.util.Bytes
 
add(byte[], byte[], byte[]) - Static method in class org.apache.nutch.util.Bytes
 
add(E) - Method in class org.apache.nutch.util.Histogram
 
add(E, float) - Method in class org.apache.nutch.util.Histogram
 
add(Histogram<E>) - Method in class org.apache.nutch.util.Histogram
 
addAttribute(String, String) - Method in class org.apache.nutch.plugin.Extension
Adds a attribute and is only used until model creation at plugin system start up.
addClue(String, String, int) - Method in class org.apache.nutch.util.EncodingDetector
 
addClue(String, String) - Method in class org.apache.nutch.util.EncodingDetector
 
addDependency(String) - Method in class org.apache.nutch.plugin.PluginDescriptor
Adds a dependency
addExportedLibRelative(String) - Method in class org.apache.nutch.plugin.PluginDescriptor
Adds a exported library with a relative path to the plugin directory.
addExtension(Extension) - Method in class org.apache.nutch.plugin.ExtensionPoint
Install a corresponding extension to this extension point.
addExtension(Extension) - Method in class org.apache.nutch.plugin.PluginDescriptor
Adds a extension.
addExtensionPoint(ExtensionPoint) - Method in class org.apache.nutch.plugin.PluginDescriptor
Adds a extension point.
addIndexBackendOptions(Configuration) - Method in class org.apache.nutch.analysis.lang.LanguageIndexingFilter
 
addIndexBackendOptions(Configuration) - Method in class org.apache.nutch.indexer.anchor.AnchorIndexingFilter
 
addIndexBackendOptions(Configuration) - Method in class org.apache.nutch.indexer.basic.BasicIndexingFilter
 
addIndexBackendOptions(Configuration) - Method in class org.apache.nutch.indexer.html.HtmlIndexingFilter
 
addIndexBackendOptions(Configuration) - Method in class org.apache.nutch.indexer.more.MoreIndexingFilter
 
addInstancesMenuMenu() - Method in class org.apache.nutch.webui.pages.AbstractBasePage
 
addMeta(String, String) - Method in class org.apache.nutch.metadata.MetaWrapper
Add metadata.
addMyHeader(HttpServletResponse, String, String) - Method in class org.apache.nutch.tools.proxy.AbstractTestbedHandler
 
addNotExportedLibRelative(String) - Method in class org.apache.nutch.plugin.PluginDescriptor
Adds a not exported library with a plugin directory relative path.
addPatternBackward(String) - Method in class org.apache.nutch.util.TrieStringMatcher
Adds any necessary nodes to the trie so that the given String can be decoded in reverse and the first character is represented by a terminal node.
addPatternForward(String) - Method in class org.apache.nutch.util.TrieStringMatcher
Adds any necessary nodes to the trie so that the given String can be decoded and the last character is represented by a terminal node.
addTiming(String, String, long) - Method in class org.apache.nutch.tools.Benchmark.BenchmarkResults
 
addUrlFeatures(NutchDocument, String) - Method in class org.creativecommons.nutch.CCIndexingFilter
Add the features represented by a license URL.
addUserMenu() - Method in class org.apache.nutch.webui.pages.AbstractBasePage
 
addValue(Map<String, Object>) - Method in class org.apache.nutch.api.model.response.DbQueryResult
 
AdminResource - Class in org.apache.nutch.api.resources
 
AdminResource() - Constructor for class org.apache.nutch.api.resources.AdminResource
 
afterExecute(Runnable, Throwable) - Method in class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
agentNames - Variable in class org.apache.nutch.protocol.RobotRulesParser
 
ALL_BATCH_ID_STR - Static variable in interface org.apache.nutch.metadata.Nutch
 
ALL_CRAWL_ID - Static variable in interface org.apache.nutch.metadata.Nutch
 
allowForbidden - Variable in class org.apache.nutch.protocol.http.api.HttpRobotRulesParser
 
AnchorIndexingFilter - Class in org.apache.nutch.indexer.anchor
Indexing filter that offers an option to either index all inbound anchor text for a document or deduplicate anchors.
AnchorIndexingFilter() - Constructor for class org.apache.nutch.indexer.anchor.AnchorIndexingFilter
 
append(Node) - Method in class org.apache.nutch.parse.html.DOMBuilder
Append a node to the current container.
ArcInputFormat - Class in org.apache.nutch.tools.arc
A input format the reads arc files.
ArcInputFormat() - Constructor for class org.apache.nutch.tools.arc.ArcInputFormat
 
ArcRecordReader - Class in org.apache.nutch.tools.arc
The ArchRecordReader class provides a record reader which reads records from arc files.
ArcRecordReader(Configuration, FileSplit) - Constructor for class org.apache.nutch.tools.arc.ArcRecordReader
Constructor that sets the configuration and file split.
ARG_BATCH - Static variable in interface org.apache.nutch.metadata.Nutch
Batch id to select.
ARG_CLASS - Static variable in interface org.apache.nutch.metadata.Nutch
Class to run as a NutchTool.
ARG_COMMIT - Static variable in class org.apache.nutch.indexer.CleaningJob
 
ARG_CRAWL - Static variable in interface org.apache.nutch.metadata.Nutch
Crawl id to use.
ARG_CURTIME - Static variable in interface org.apache.nutch.metadata.Nutch
The notion of current time.
ARG_DEPTH - Static variable in interface org.apache.nutch.metadata.Nutch
Depth (number of cycles) of a crawl.
ARG_FILTER - Static variable in interface org.apache.nutch.metadata.Nutch
Apply URLFilters.
ARG_FORCE - Static variable in interface org.apache.nutch.metadata.Nutch
Force processing even if there are locks or inconsistencies.
ARG_NORMALIZE - Static variable in interface org.apache.nutch.metadata.Nutch
Apply URLNormalizers.
ARG_NUMTASKS - Static variable in interface org.apache.nutch.metadata.Nutch
Number of fetcher tasks.
ARG_RESUME - Static variable in interface org.apache.nutch.metadata.Nutch
Resume previously aborted op.
ARG_SEEDDIR - Static variable in interface org.apache.nutch.metadata.Nutch
a path to a directory containing a list of seed URLs.
ARG_SEEDLIST - Static variable in interface org.apache.nutch.metadata.Nutch
Whitespace-separated list of seed URLs.
ARG_SOLR - Static variable in interface org.apache.nutch.metadata.Nutch
Solr URL.
ARG_SORT - Static variable in interface org.apache.nutch.metadata.Nutch
Sort statistics.
ARG_THREADS - Static variable in interface org.apache.nutch.metadata.Nutch
Number of fetcher threads (per map task).
ARG_TOPN - Static variable in interface org.apache.nutch.metadata.Nutch
Generate topN scoring URLs.
attrName - Variable in class org.apache.nutch.parse.html.DOMContentUtils.LinkParams
 
autoDetectClues(WebPage, boolean) - Method in class org.apache.nutch.util.EncodingDetector
 
AutomatonURLFilter - Class in org.apache.nutch.urlfilter.automaton
RegexURLFilterBase implementation based on the dk.brics.automaton Finite-State Automata for JavaTM.
AutomatonURLFilter() - Constructor for class org.apache.nutch.urlfilter.automaton.AutomatonURLFilter
 
AutomatonURLFilter(String) - Constructor for class org.apache.nutch.urlfilter.automaton.AutomatonURLFilter
 
autoResolveContentType(String, String, byte[]) - Method in class org.apache.nutch.util.MimeUtil
A facade interface to trying all the possible mime type resolution strategies available within Tika.

B

BasicIndexingFilter - Class in org.apache.nutch.indexer.basic
Adds basic searchable fields to a document.
BasicIndexingFilter() - Constructor for class org.apache.nutch.indexer.basic.BasicIndexingFilter
 
BasicURLNormalizer - Class in org.apache.nutch.net.urlnormalizer.basic
Converts URLs to a normal form: remove dot segments in path: /./ or /../ remove default ports, e.g.
BasicURLNormalizer() - Constructor for class org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer
 
BATCH_ID - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
BATCH_NAME_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
batchId - Variable in class org.apache.nutch.indexer.IndexingJob.IndexerMapper
 
beforeExecute(Thread, Runnable) - Method in class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
Benchmark - Class in org.apache.nutch.tools
 
Benchmark() - Constructor for class org.apache.nutch.tools.Benchmark
 
benchmark(int, int, int, int, long, String) - Method in class org.apache.nutch.tools.Benchmark
 
Benchmark.BenchmarkResults - Class in org.apache.nutch.tools
 
Benchmark.BenchmarkResults() - Constructor for class org.apache.nutch.tools.Benchmark.BenchmarkResults
 
binarySearch(byte[][], byte[], int, int, RawComparator<byte[]>) - Static method in class org.apache.nutch.util.Bytes
Binary search for keys in indexes.
BLOCKED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Thread was blocked http.max.delays times during fetching.
BlockedException - Exception in org.apache.nutch.protocol.http.api
 
BlockedException(String) - Constructor for exception org.apache.nutch.protocol.http.api.BlockedException
 
BOOST_FIELD - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
BOOST_FIELD - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
BUFFER_SIZE - Static variable in class org.apache.nutch.protocol.http.api.HttpBase
 
build() - Method in class org.apache.nutch.storage.Host.Builder
 
build() - Method in class org.apache.nutch.storage.ParseStatus.Builder
 
build() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
 
build() - Method in class org.apache.nutch.storage.WebPage.Builder
 
build() - Method in class org.apache.nutch.webui.client.impl.RemoteCommandBuilder
 
build() - Method in class org.apache.nutch.webui.pages.components.ColorEnumLabelBuilder
 
Bytes - Class in org.apache.nutch.util
Utility class that handles byte arrays, conversions to/from other types, comparisons, hash code generation, manufacturing keys for HashMaps or HashSets, etc.
Bytes() - Constructor for class org.apache.nutch.util.Bytes
 
Bytes.ByteArrayComparator - Class in org.apache.nutch.util
Byte array comparator class.
Bytes.ByteArrayComparator() - Constructor for class org.apache.nutch.util.Bytes.ByteArrayComparator
Constructor
BYTES_COMPARATOR - Static variable in class org.apache.nutch.util.Bytes
Pass this to TreeMaps where byte [] are keys.
BYTES_RAWCOMPARATOR - Static variable in class org.apache.nutch.util.Bytes
Use comparing byte arrays, byte-by-byte
bytesToVint(byte[]) - Static method in class org.apache.nutch.util.Bytes
 

C

CACHE - Static variable in class org.apache.nutch.protocol.RobotRulesParser
 
CACHING_FORBIDDEN_ALL - Static variable in interface org.apache.nutch.metadata.Nutch
Don't show either original forbidden content or summaries.
CACHING_FORBIDDEN_CONTENT - Static variable in interface org.apache.nutch.metadata.Nutch
Don't show original forbidden content, but show summaries.
CACHING_FORBIDDEN_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
Sites may request that search engines don't provide access to cached documents.
CACHING_FORBIDDEN_KEY_UTF8 - Static variable in interface org.apache.nutch.metadata.Nutch
 
CACHING_FORBIDDEN_NONE - Static variable in interface org.apache.nutch.metadata.Nutch
Show both original forbidden content and summaries (default).
calculate(WebPage) - Method in class org.apache.nutch.crawl.MD5Signature
 
calculate(WebPage) - Method in class org.apache.nutch.crawl.Signature
 
calculate(WebPage) - Method in class org.apache.nutch.crawl.TextMD5Signature
 
calculate(WebPage) - Method in class org.apache.nutch.crawl.TextProfileSignature
 
calculateLastFetchTime(WebPage) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
This method return the last fetch time of the CrawlDatum
calculateLastFetchTime(WebPage) - Method in interface org.apache.nutch.crawl.FetchSchedule
Calculates last fetch time of the given CrawlDatum.
call() - Method in class org.apache.nutch.webui.client.impl.RemoteCommandExecutor.JobStateChecker
 
canStop(boolean) - Method in class org.apache.nutch.api.NutchServer
Safety and convenience method to determine whether or not it is safe to shut down the server.
CCIndexingFilter - Class in org.creativecommons.nutch
Adds basic searchable fields to a document.
CCIndexingFilter() - Constructor for class org.creativecommons.nutch.CCIndexingFilter
 
CCParseFilter - Class in org.creativecommons.nutch
Adds metadata identifying the Creative Commons license used, if any.
CCParseFilter() - Constructor for class org.creativecommons.nutch.CCParseFilter
 
CCParseFilter.Walker - Class in org.creativecommons.nutch
Walks DOM tree, looking for RDF in comments and licenses in anchors.
cdata(char[], int, int) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of cdata.
CHAR_ENCODING_FOR_CONVERSION - Static variable in interface org.apache.nutch.metadata.Nutch
 
characters(char[], int, int) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of character data.
charactersRaw(char[], int, int) - Method in class org.apache.nutch.parse.html.DOMBuilder
If available, when the disable-output-escaping attribute is used, output raw text without escaping.
CHECK_BLOCKING - Static variable in interface org.apache.nutch.protocol.Protocol
Property name.
CHECK_ROBOTS - Static variable in interface org.apache.nutch.protocol.Protocol
Property name.
checkClientTrusted(X509Certificate[], String) - Method in class org.apache.nutch.protocol.httpclient.DummyX509TrustManager
 
checkMark(WebPage) - Method in enum org.apache.nutch.storage.Mark
 
checkOutputSpecs(JobContext) - Method in class org.apache.nutch.indexer.IndexerOutputFormat
 
checkServerTrusted(X509Certificate[], String) - Method in class org.apache.nutch.protocol.httpclient.DummyX509TrustManager
 
childLen - Variable in class org.apache.nutch.parse.html.DOMContentUtils.LinkParams
 
children - Variable in class org.apache.nutch.util.TrieStringMatcher.TrieNode
 
childrenList - Variable in class org.apache.nutch.util.TrieStringMatcher.TrieNode
 
chooseRepr(String, String, boolean) - Static method in class org.apache.nutch.util.URLUtil
Given two urls, a src and a destination of a redirect, it returns the representative url.
CircularDependencyException - Exception in org.apache.nutch.plugin
CircularDependencyException will be thrown if a circular dependency is detected.
CircularDependencyException(Throwable) - Constructor for exception org.apache.nutch.plugin.CircularDependencyException
 
CircularDependencyException(String) - Constructor for exception org.apache.nutch.plugin.CircularDependencyException
 
cleanField(String) - Static method in class org.apache.nutch.util.StringUtil
Takes in a String value and cleans out any offending "�"
CleaningJob - Class in org.apache.nutch.indexer
 
CleaningJob() - Constructor for class org.apache.nutch.indexer.CleaningJob
 
CleaningJob.CleanMapper - Class in org.apache.nutch.indexer
 
CleaningJob.CleanMapper() - Constructor for class org.apache.nutch.indexer.CleaningJob.CleanMapper
 
CleaningJob.CleanReducer - Class in org.apache.nutch.indexer
 
CleaningJob.CleanReducer() - Constructor for class org.apache.nutch.indexer.CleaningJob.CleanReducer
 
cleanMimeType(String) - Static method in class org.apache.nutch.util.MimeUtil
Cleans a MimeType name by removing out the actual MimeType, from a string of the form:
cleanup(Reducer<UrlWithScore, NutchWritable, String, WebPage>.Context) - Method in class org.apache.nutch.crawl.DbUpdateReducer
 
cleanup(Reducer<Text, LongWritable, Text, LongWritable>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatCombiner
 
cleanup(Reducer<Text, LongWritable, Text, LongWritable>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatReducer
 
cleanup(Reducer<String, WebPage, NullWritable, NullWritable>.Context) - Method in class org.apache.nutch.indexer.CleaningJob.CleanReducer
 
cleanup(Mapper<String, WebPage, String, NutchDocument>.Context) - Method in class org.apache.nutch.indexer.IndexingJob.IndexerMapper
 
cleanup(Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>.Context) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
clear() - Method in class org.apache.nutch.metadata.Metadata
Remove all mappings from metadata.
clearArgs() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Clears the value of the 'args' field
clearArgs() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Clears the value of the 'args' field
clearBaseUrl() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'baseUrl' field
clearBatchId() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'batchId' field
clearClues() - Method in class org.apache.nutch.util.EncodingDetector
Clears all clues.
clearCode() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Clears the value of the 'code' field
clearContent() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'content' field
clearContentType() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'contentType' field
clearFetchInterval() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'fetchInterval' field
clearFetchTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'fetchTime' field
clearHeaders() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'headers' field
clearInlinks() - Method in class org.apache.nutch.storage.Host.Builder
Clears the value of the 'inlinks' field
clearInlinks() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'inlinks' field
clearLastModified() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Clears the value of the 'lastModified' field
clearMajorCode() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Clears the value of the 'majorCode' field
clearMarkers() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'markers' field
clearMetadata() - Method in class org.apache.nutch.storage.Host.Builder
Clears the value of the 'metadata' field
clearMetadata() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'metadata' field
clearMinorCode() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Clears the value of the 'minorCode' field
clearModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'modifiedTime' field
clearOutlinks() - Method in class org.apache.nutch.storage.Host.Builder
Clears the value of the 'outlinks' field
clearOutlinks() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'outlinks' field
clearParseStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'parseStatus' field
clearPrevFetchTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'prevFetchTime' field
clearPrevModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'prevModifiedTime' field
clearPrevSignature() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'prevSignature' field
clearProtocolStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'protocolStatus' field
clearReprUrl() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'reprUrl' field
clearRetriesSinceFetch() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'retriesSinceFetch' field
clearScore() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'score' field
clearSignature() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'signature' field
clearStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'status' field
clearText() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'text' field
clearTitle() - Method in class org.apache.nutch.storage.WebPage.Builder
Clears the value of the 'title' field
Client - Class in org.apache.nutch.protocol.ftp
Client.java encapsulates functionalities necessary for nutch to get dir list and retrieve file from an FTP server.
Client() - Constructor for class org.apache.nutch.protocol.ftp.Client
 
close() - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatMapper
 
close() - Method in class org.apache.nutch.host.HostDb
 
close() - Method in interface org.apache.nutch.indexer.IndexWriter
 
close() - Method in class org.apache.nutch.indexer.IndexWriters
 
close() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecordReader
 
close() - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
close() - Method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
close() - Method in class org.apache.nutch.tools.arc.ArcRecordReader
Closes the record reader resources.
close() - Method in class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsMapper
 
closeReaders(SequenceFile.Reader[]) - Static method in class org.apache.nutch.util.FSUtils
Closes a group of SequenceFile readers.
closeReaders(MapFile.Reader[]) - Static method in class org.apache.nutch.util.FSUtils
Closes a group of MapFile readers.
CLUSTER - Static variable in interface org.apache.nutch.indexwriter.elastic.ElasticConstants
 
CollectionManager - Class in org.apache.nutch.collection
 
CollectionManager(Configuration) - Constructor for class org.apache.nutch.collection.CollectionManager
 
CollectionManager() - Constructor for class org.apache.nutch.collection.CollectionManager
Used for testing
ColorEnumLabel<E extends Enum<E>> - Class in org.apache.nutch.webui.pages.components
Label which renders connection status as bootstrap label
ColorEnumLabelBuilder<E extends Enum<E>> - Class in org.apache.nutch.webui.pages.components
 
ColorEnumLabelBuilder(String) - Constructor for class org.apache.nutch.webui.pages.components.ColorEnumLabelBuilder
 
commandExecuted(Crawl, RemoteCommand, int) - Method in interface org.apache.nutch.webui.client.impl.CrawlingCycleListener
 
commandExecuted(Crawl, RemoteCommand, int) - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
CommandRunner - Class in org.apache.nutch.util
 
CommandRunner() - Constructor for class org.apache.nutch.util.CommandRunner
 
comment(char[], int, int) - Method in class org.apache.nutch.parse.html.DOMBuilder
Report an XML comment anywhere in the document.
commit() - Method in interface org.apache.nutch.indexer.IndexWriter
 
commit() - Method in class org.apache.nutch.indexer.IndexWriters
 
commit() - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
commit() - Method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
COMMIT_INDEX - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
COMMIT_INDEX - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
COMMIT_SIZE - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
COMMIT_SIZE - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
compare(byte[], byte[]) - Static method in class org.apache.nutch.crawl.SignatureComparator
 
compare(ByteBuffer, ByteBuffer) - Static method in class org.apache.nutch.crawl.SignatureComparator
 
compare(UrlWithScore, UrlWithScore) - Method in class org.apache.nutch.crawl.UrlWithScore.UrlScoreComparator
 
compare(byte[], int, int, byte[], int, int) - Method in class org.apache.nutch.crawl.UrlWithScore.UrlScoreComparator
 
compare(UrlWithScore, UrlWithScore) - Method in class org.apache.nutch.crawl.UrlWithScore.UrlScoreComparator.UrlOnlyComparator
 
compare(byte[], int, int, byte[], int, int) - Method in class org.apache.nutch.crawl.UrlWithScore.UrlScoreComparator.UrlOnlyComparator
 
compare(byte[], byte[]) - Method in class org.apache.nutch.util.Bytes.ByteArrayComparator
 
compare(byte[], int, int, byte[], int, int) - Method in class org.apache.nutch.util.Bytes.ByteArrayComparator
 
compareTo(GeneratorJob.SelectorEntry) - Method in class org.apache.nutch.crawl.GeneratorJob.SelectorEntry
 
compareTo(UrlWithScore) - Method in class org.apache.nutch.crawl.UrlWithScore
 
compareTo(byte[], byte[]) - Static method in class org.apache.nutch.util.Bytes
 
compareTo(byte[], int, int, byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
Lexographically compare two arrays.
compareTo(TrieStringMatcher.TrieNode) - Method in class org.apache.nutch.util.TrieStringMatcher.TrieNode
 
conf - Variable in class org.apache.nutch.plugin.Plugin
 
conf - Variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
configManager - Variable in class org.apache.nutch.api.resources.AbstractResource
 
ConfigResource - Class in org.apache.nutch.api.resources
 
ConfigResource() - Constructor for class org.apache.nutch.api.resources.ConfigResource
 
ConfManager - Interface in org.apache.nutch.api
 
ConnectionStatus - Enum in org.apache.nutch.webui.client.model
 
contains(String) - Method in class org.apache.nutch.storage.Host
 
Content - Class in org.apache.nutch.protocol
 
Content() - Constructor for class org.apache.nutch.protocol.Content
 
Content(String, String, byte[], String, Metadata, Configuration) - Constructor for class org.apache.nutch.protocol.Content
 
Content(String, String, byte[], String, Metadata, MimeUtil) - Constructor for class org.apache.nutch.protocol.Content
 
CONTENT_DISPOSITION - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
CONTENT_ENCODING - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
CONTENT_LANGUAGE - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
CONTENT_LENGTH - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
CONTENT_LOCATION - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
CONTENT_MD5 - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
CONTENT_TYPE - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
CONTENT_TYPE_UTF8 - Static variable in class org.apache.nutch.util.EncodingDetector
 
CONTRIBUTOR - Static variable in interface org.apache.nutch.metadata.DublinCore
An entity responsible for making contributions to the content of the resource.
convertPage(WebPage, Set<String>) - Static method in class org.apache.nutch.api.impl.db.DbPageConverter
 
count - Static variable in class org.apache.nutch.crawl.GeneratorReducer
 
COVERAGE - Static variable in interface org.apache.nutch.metadata.DublinCore
The extent or scope of the content of the resource.
CpmIteratorAdapter<T> - Class in org.apache.nutch.webui.pages.components
This is iterator adapter, which wraps iterable items with CompoundPropertyModel.
CpmIteratorAdapter(Iterable<T>) - Constructor for class org.apache.nutch.webui.pages.components.CpmIteratorAdapter
 
Crawl - Class in org.apache.nutch.webui.client.model
 
Crawl() - Constructor for class org.apache.nutch.webui.client.model.Crawl
 
Crawl.CrawlStatus - Enum in org.apache.nutch.webui.client.model
 
CRAWL_ID_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
CRAWLDB_ADDITIONS_ALLOWED - Static variable in class org.apache.nutch.crawl.DbUpdateReducer
 
CrawlingCycle - Class in org.apache.nutch.webui.client.impl
This class implements crawl cycle as in crawl script
CrawlingCycle(CrawlingCycleListener, RemoteCommandExecutor, Crawl, List<RemoteCommand>) - Constructor for class org.apache.nutch.webui.client.impl.CrawlingCycle
 
CrawlingCycleListener - Interface in org.apache.nutch.webui.client.impl
 
crawlingFinished(Crawl) - Method in interface org.apache.nutch.webui.client.impl.CrawlingCycleListener
 
crawlingFinished(Crawl) - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
crawlingStarted(Crawl) - Method in interface org.apache.nutch.webui.client.impl.CrawlingCycleListener
 
crawlingStarted(Crawl) - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
CrawlPanel - Class in org.apache.nutch.webui.pages.crawls
 
CrawlPanel(String) - Constructor for class org.apache.nutch.webui.pages.crawls.CrawlPanel
 
CrawlService - Interface in org.apache.nutch.webui.service
 
CrawlServiceImpl - Class in org.apache.nutch.webui.service.impl
 
CrawlServiceImpl() - Constructor for class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
CrawlsPage - Class in org.apache.nutch.webui.pages.crawls
This page is for crawls management
CrawlsPage() - Constructor for class org.apache.nutch.webui.pages.crawls.CrawlsPage
 
CrawlStatus - Class in org.apache.nutch.crawl
 
CrawlStatus() - Constructor for class org.apache.nutch.crawl.CrawlStatus
 
create(NutchConfig) - Method in interface org.apache.nutch.api.ConfManager
 
create(NutchConfig) - Method in class org.apache.nutch.api.impl.RAMConfManager
 
create(JobConfig) - Method in class org.apache.nutch.api.impl.RAMJobManager
 
create(JobConfig) - Method in interface org.apache.nutch.api.JobManager
 
create(JobConfig) - Method in class org.apache.nutch.api.resources.JobResource
 
create() - Static method in class org.apache.nutch.util.NutchConfiguration
Create a Configuration for Nutch.
create(boolean, Properties) - Static method in class org.apache.nutch.util.NutchConfiguration
Create a Configuration from supplied properties.
createClient() - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
createCommands(Crawl) - Method in class org.apache.nutch.webui.client.impl.RemoteCommandsBatchFactory
 
createConfig(NutchConfig) - Method in class org.apache.nutch.api.resources.ConfigResource
 
createCrawlDao() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
createDao(Class<T>) - Method in class org.apache.nutch.webui.config.CustomDaoFactory
 
createKey() - Method in class org.apache.nutch.tools.arc.ArcRecordReader
Creates a new instance of the Text object for the key.
createLockFile(FileSystem, Path, boolean) - Static method in class org.apache.nutch.util.LockUtil
Create a lock file.
createNutchDao() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
createRecordReader(InputSplit, TaskAttemptContext) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputFormat
 
createRule(boolean, String) - Method in class org.apache.nutch.urlfilter.api.RegexURLFilterBase
Creates a new RegexRule.
createRule(boolean, String) - Method in class org.apache.nutch.urlfilter.automaton.AutomatonURLFilter
 
createRule(boolean, String) - Method in class org.apache.nutch.urlfilter.regex.RegexURLFilter
 
createSeed(SeedList) - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
createSeed(SeedList) - Method in interface org.apache.nutch.webui.client.NutchClient
Create seed list and return seed directory location
createSeedFile(SeedList) - Method in class org.apache.nutch.api.resources.SeedResource
 
createSeedListDao() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
createSeedUrlDao() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
createSocket(String, int, InetAddress, int) - Method in class org.apache.nutch.protocol.httpclient.DummySSLProtocolSocketFactory
 
createSocket(String, int, InetAddress, int, HttpConnectionParams) - Method in class org.apache.nutch.protocol.httpclient.DummySSLProtocolSocketFactory
Attempts to get a new socket connection to the given host within the given time limit.
createSocket(String, int) - Method in class org.apache.nutch.protocol.httpclient.DummySSLProtocolSocketFactory
 
createSocket(Socket, String, int, boolean) - Method in class org.apache.nutch.protocol.httpclient.DummySSLProtocolSocketFactory
 
createSubCollection(String, String) - Method in class org.apache.nutch.collection.CollectionManager
Create a new subcollection.
createTableCreator() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
createToolByClassName(String, Configuration) - Method in class org.apache.nutch.api.impl.JobFactory
 
createToolByType(JobManager.JobType, Configuration) - Method in class org.apache.nutch.api.impl.JobFactory
 
createValue() - Method in class org.apache.nutch.tools.arc.ArcRecordReader
Creates a new instance of the BytesWritable object for the key
createWebStore(Configuration, Class<K>, Class<V>) - Static method in class org.apache.nutch.storage.StorageUtils
Creates a store for the given persistentClass.
CreativeCommons - Interface in org.apache.nutch.metadata
A collection of Creative Commons properties names.
CREATOR - Static variable in interface org.apache.nutch.metadata.DublinCore
An entity primarily responsible for making the content of the resource.
currentInstance - Variable in class org.apache.nutch.webui.pages.AbstractBasePage
 
currentJob - Variable in class org.apache.nutch.util.NutchTool
 
currentJobNum - Variable in class org.apache.nutch.util.NutchTool
 
CustomDaoFactory - Class in org.apache.nutch.webui.config
 
CustomDaoFactory(ConnectionSource) - Constructor for class org.apache.nutch.webui.config.CustomDaoFactory
 
CustomTableCreator - Class in org.apache.nutch.webui.config
 
CustomTableCreator(ConnectionSource, List<Dao<?, ?>>) - Constructor for class org.apache.nutch.webui.config.CustomTableCreator
 

D

DashboardPage - Class in org.apache.nutch.webui.pages
 
DashboardPage() - Constructor for class org.apache.nutch.webui.pages.DashboardPage
 
datastore - Variable in class org.apache.nutch.crawl.DbUpdateReducer
 
DATE - Static variable in interface org.apache.nutch.metadata.DublinCore
A date associated with an event in the life cycle of the resource.
DbFilter - Class in org.apache.nutch.api.model.request
 
DbFilter() - Constructor for class org.apache.nutch.api.model.request.DbFilter
 
DbIterator - Class in org.apache.nutch.api.impl.db
 
DbPageConverter - Class in org.apache.nutch.api.impl.db
 
DbPageConverter() - Constructor for class org.apache.nutch.api.impl.db.DbPageConverter
 
DbQueryResult - Class in org.apache.nutch.api.model.response
 
DbQueryResult() - Constructor for class org.apache.nutch.api.model.response.DbQueryResult
 
DbReader - Class in org.apache.nutch.api.impl.db
 
DbReader(Configuration, String) - Constructor for class org.apache.nutch.api.impl.db.DbReader
 
DbResource - Class in org.apache.nutch.api.resources
 
DbResource() - Constructor for class org.apache.nutch.api.resources.DbResource
 
DbUpdateMapper - Class in org.apache.nutch.crawl
 
DbUpdateMapper() - Constructor for class org.apache.nutch.crawl.DbUpdateMapper
 
DbUpdateReducer - Class in org.apache.nutch.crawl
 
DbUpdateReducer() - Constructor for class org.apache.nutch.crawl.DbUpdateReducer
 
DbUpdaterJob - Class in org.apache.nutch.crawl
 
DbUpdaterJob() - Constructor for class org.apache.nutch.crawl.DbUpdaterJob
 
DbUpdaterJob(Configuration) - Constructor for class org.apache.nutch.crawl.DbUpdaterJob
 
debug - Variable in class org.apache.nutch.tools.proxy.AbstractTestbedHandler
 
dedup(String) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
DEFAULT - Static variable in class org.apache.nutch.api.resources.ConfigResource
 
DEFAULT_BOOST - Static variable in class org.apache.nutch.util.domain.DomainSuffix
 
DEFAULT_DELAY - Static variable in class org.apache.nutch.tools.proxy.DelayHandler
 
DEFAULT_FILE_NAME - Static variable in class org.apache.nutch.collection.CollectionManager
 
DEFAULT_HOSTDB_CONCURRENCY_LEVEL - Static variable in class org.apache.nutch.host.HostDb
 
DEFAULT_LRU_SIZE - Static variable in class org.apache.nutch.host.HostDb
 
DEFAULT_PLUGIN - Static variable in class org.apache.nutch.parse.ParserFactory
Wildcard for default plugins.
DEFAULT_STATUS - Static variable in class org.apache.nutch.util.domain.DomainSuffix
 
DefaultFetchSchedule - Class in org.apache.nutch.crawl
This class implements the default re-fetch schedule.
DefaultFetchSchedule() - Constructor for class org.apache.nutch.crawl.DefaultFetchSchedule
 
defaultInterval - Variable in class org.apache.nutch.crawl.AbstractFetchSchedule
 
deflate(byte[]) - Static method in class org.apache.nutch.util.DeflateUtils
Returns a deflated copy of the input array.
DeflateUtils - Class in org.apache.nutch.util
A collection of utility methods for working on deflated data.
DeflateUtils() - Constructor for class org.apache.nutch.util.DeflateUtils
 
DelayHandler - Class in org.apache.nutch.tools.proxy
 
DelayHandler(int) - Constructor for class org.apache.nutch.tools.proxy.DelayHandler
 
delete(String) - Method in interface org.apache.nutch.api.ConfManager
 
delete(String) - Method in class org.apache.nutch.api.impl.RAMConfManager
 
delete(boolean) - Method in class org.apache.nutch.indexer.CleaningJob
 
delete(String) - Method in interface org.apache.nutch.indexer.IndexWriter
 
delete(String) - Method in class org.apache.nutch.indexer.IndexWriters
 
delete(String) - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
delete(String) - Method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
delete(Long) - Method in class org.apache.nutch.webui.service.impl.SeedListServiceImpl
 
delete(Long) - Method in interface org.apache.nutch.webui.service.SeedListService
 
deleteConfig(String) - Method in class org.apache.nutch.api.resources.ConfigResource
 
deleteCrawl(Long) - Method in interface org.apache.nutch.webui.service.CrawlService
 
deleteCrawl(Long) - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
deleteMeta(String) - Method in class org.apache.nutch.scoring.ScoreDatum
 
deleteSubCollection(String) - Method in class org.apache.nutch.collection.CollectionManager
Delete named subcollection
describe() - Method in interface org.apache.nutch.indexer.IndexWriter
Returns a String describing the IndexWriter instance and the specific parameters it can take
describe() - Method in class org.apache.nutch.indexer.IndexWriters
 
describe() - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
describe() - Method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
DESCRIPTION - Static variable in interface org.apache.nutch.metadata.DublinCore
An account of the content of the resource.
DIGEST_FIELD - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
DIGEST_FIELD - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
DIR_NAME - Static variable in class org.apache.nutch.protocol.Content
 
disconnect() - Method in class org.apache.nutch.protocol.ftp.Client
Closes the connection to the FTP server and restores connection parameters to the default values.
DISTANCE - Static variable in class org.apache.nutch.crawl.DbUpdaterJob
 
distributeScoreToOutlinks(String, WebPage, Collection<ScoreDatum>, int) - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
distributeScoreToOutlinks(String, WebPage, Collection<ScoreDatum>, int) - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
Get cash on hand, divide it by the number of outlinks and apply.
distributeScoreToOutlinks(String, WebPage, Collection<ScoreDatum>, int) - Method in interface org.apache.nutch.scoring.ScoringFilter
Distribute score value from the current page to all its outlinked pages.
distributeScoreToOutlinks(String, WebPage, Collection<ScoreDatum>, int) - Method in class org.apache.nutch.scoring.ScoringFilters
 
distributeScoreToOutlinks(String, WebPage, Collection<ScoreDatum>, int) - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
DmozParser - Class in org.apache.nutch.tools
Utility that converts DMOZ RDF into a flat file of URLs to be injected.
DmozParser() - Constructor for class org.apache.nutch.tools.DmozParser
 
documentCount - Static variable in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
doFilter(ServletRequest, ServletResponse, FilterChain) - Method in class org.apache.nutch.tools.proxy.LogDebugHandler
 
DomainStatistics - Class in org.apache.nutch.util.domain
Extracts some very basic statistics about domains from the crawldb
DomainStatistics() - Constructor for class org.apache.nutch.util.domain.DomainStatistics
 
DomainStatistics.DomainStatisticsCombiner - Class in org.apache.nutch.util.domain
 
DomainStatistics.DomainStatisticsCombiner() - Constructor for class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsCombiner
 
DomainStatistics.DomainStatisticsMapper - Class in org.apache.nutch.util.domain
 
DomainStatistics.DomainStatisticsMapper() - Constructor for class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsMapper
 
DomainStatistics.DomainStatisticsReducer - Class in org.apache.nutch.util.domain
 
DomainStatistics.DomainStatisticsReducer() - Constructor for class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsReducer
 
DomainStatistics.MyCounter - Enum in org.apache.nutch.util.domain
 
DomainSuffix - Class in org.apache.nutch.util.domain
This class represents the last part of the host name, which is operated by authoritives, not individuals.
DomainSuffix(String, DomainSuffix.Status, float) - Constructor for class org.apache.nutch.util.domain.DomainSuffix
 
DomainSuffix(String) - Constructor for class org.apache.nutch.util.domain.DomainSuffix
 
DomainSuffix.Status - Enum in org.apache.nutch.util.domain
Enumeration of the status of the tld.
DomainSuffixes - Class in org.apache.nutch.util.domain
Storage class for DomainSuffix objects Note: this class is singleton
DomainURLFilter - Class in org.apache.nutch.urlfilter.domain
Filters URLs based on a file containing domain suffixes, domain names, and hostnames.
DomainURLFilter() - Constructor for class org.apache.nutch.urlfilter.domain.DomainURLFilter
Default constructor.
DomainURLFilter(String) - Constructor for class org.apache.nutch.urlfilter.domain.DomainURLFilter
Constructor that specifies the domain file to use.
DOMBuilder - Class in org.apache.nutch.parse.html
This class takes SAX events (in addition to some extra events that SAX doesn't handle yet) and adds the result to a document or document fragment.
DOMBuilder(Document, Node) - Constructor for class org.apache.nutch.parse.html.DOMBuilder
DOMBuilder instance constructor...
DOMBuilder(Document, DocumentFragment) - Constructor for class org.apache.nutch.parse.html.DOMBuilder
DOMBuilder instance constructor...
DOMBuilder(Document) - Constructor for class org.apache.nutch.parse.html.DOMBuilder
DOMBuilder instance constructor...
DOMContentUtils - Class in org.apache.nutch.parse.html
A collection of methods for extracting content from DOM trees.
DOMContentUtils(Configuration) - Constructor for class org.apache.nutch.parse.html.DOMContentUtils
 
DOMContentUtils - Class in org.apache.nutch.parse.tika
A collection of methods for extracting content from DOM trees.
DOMContentUtils(Configuration) - Constructor for class org.apache.nutch.parse.tika.DOMContentUtils
 
DOMContentUtils.LinkParams - Class in org.apache.nutch.parse.html
 
DOMContentUtils.LinkParams(String, String, int) - Constructor for class org.apache.nutch.parse.html.DOMContentUtils.LinkParams
 
DomUtil - Class in org.apache.nutch.util
 
DomUtil() - Constructor for class org.apache.nutch.util.DomUtil
 
DublinCore - Interface in org.apache.nutch.metadata
A collection of Dublin Core metadata names.
DummySSLProtocolSocketFactory - Class in org.apache.nutch.protocol.httpclient
 
DummySSLProtocolSocketFactory() - Constructor for class org.apache.nutch.protocol.httpclient.DummySSLProtocolSocketFactory
Constructor for DummySSLProtocolSocketFactory.
DummyX509TrustManager - Class in org.apache.nutch.protocol.httpclient
 
DummyX509TrustManager(KeyStore) - Constructor for class org.apache.nutch.protocol.httpclient.DummyX509TrustManager
Constructor for DummyX509TrustManager.

E

elapsedTime(long, long) - Static method in class org.apache.nutch.util.TimingUtil
Calculate the elapsed time between two times specified in milliseconds.
ELASTIC_PREFIX - Static variable in interface org.apache.nutch.indexwriter.elastic.ElasticConstants
 
ElasticConstants - Interface in org.apache.nutch.indexwriter.elastic
 
ElasticIndexWriter - Class in org.apache.nutch.indexwriter.elastic
 
ElasticIndexWriter() - Constructor for class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
elName - Variable in class org.apache.nutch.parse.html.DOMContentUtils.LinkParams
 
EMPTY_BYTE_ARRAY - Static variable in class org.apache.nutch.util.Bytes
An empty instance.
EMPTY_RULES - Static variable in class org.apache.nutch.protocol.RobotRulesParser
A BaseRobotRules object appropriate for use when the robots.txt file is empty or missing; all requests are allowed.
EncodingDetector - Class in org.apache.nutch.util
A simple class for detecting character encodings.
EncodingDetector(Configuration) - Constructor for class org.apache.nutch.util.EncodingDetector
 
endCDATA() - Method in class org.apache.nutch.parse.html.DOMBuilder
Report the end of a CDATA section.
endDocument() - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of the end of a document.
endDTD() - Method in class org.apache.nutch.parse.html.DOMBuilder
Report the end of DTD declarations.
endElement(String, String, String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of the end of an element.
endEntity(String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Report the end of an entity.
endPrefixMapping(String) - Method in class org.apache.nutch.parse.html.DOMBuilder
End the scope of a prefix-URI mapping.
entityReference(String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notivication of a entityReference.
equals(Object) - Method in class org.apache.nutch.api.model.request.SeedList
 
equals(Object) - Method in class org.apache.nutch.api.model.request.SeedUrl
 
equals(Object) - Method in class org.apache.nutch.crawl.GeneratorJob.SelectorEntry
 
equals(Object) - Method in class org.apache.nutch.metadata.Metadata
 
equals(Object) - Method in class org.apache.nutch.parse.Outlink
 
equals(Object) - Method in class org.apache.nutch.plugin.PluginClassLoader
 
equals(Object) - Method in class org.apache.nutch.protocol.Content
 
equals(Object) - Method in class org.apache.nutch.protocol.httpclient.DummySSLProtocolSocketFactory
 
equals(byte[], byte[]) - Static method in class org.apache.nutch.util.Bytes
 
equals(Object) - Method in class org.apache.nutch.webui.model.SeedList
 
equals(Object) - Method in class org.apache.nutch.webui.model.SeedUrl
 
ErrorResponse - Class in org.apache.nutch.api.model.response
 
ErrorResponse(Throwable) - Constructor for class org.apache.nutch.api.model.response.ErrorResponse
 
ErrorStatusService - Class in org.apache.nutch.api.misc
 
ErrorStatusService() - Constructor for class org.apache.nutch.api.misc.ErrorStatusService
 
ESTIMATED_HEAP_TAX - Static variable in class org.apache.nutch.util.Bytes
Estimate of size cost to pay beyond payload in jvm for instance of byte [].
evaluate() - Method in class org.apache.nutch.util.CommandRunner
 
EXCEPTION - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Unspecified exception occured.
exec() - Method in class org.apache.nutch.util.CommandRunner
 
executeCrawlCycle() - Method in class org.apache.nutch.webui.client.impl.CrawlingCycle
 
executeJob(JobConfig) - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
executeJob(JobConfig) - Method in interface org.apache.nutch.webui.client.NutchClient
 
executeRemoteJob(RemoteCommand) - Method in class org.apache.nutch.webui.client.impl.RemoteCommandExecutor
 
Extension - Class in org.apache.nutch.plugin
An Extension is a kind of listener descriptor that will be installed on a concrete ExtensionPoint that acts as kind of Publisher.
Extension(PluginDescriptor, String, String, String, Configuration, PluginRepository) - Constructor for class org.apache.nutch.plugin.Extension
 
ExtensionPoint - Class in org.apache.nutch.plugin
The ExtensionPoint provide meta information of a extension point.
ExtensionPoint(String, String, String) - Constructor for class org.apache.nutch.plugin.ExtensionPoint
Constructor

F

FAILED - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
General failure.
FAILED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Content was not retrieved.
FAILED_EXCEPTION - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsing failed.
FAILED_INVALID_FORMAT - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsing failed.
FAILED_MISSING_CONTENT - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsing failed.
FAILED_MISSING_PARTS - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsing failed.
FAILED_TRUNCATED - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsing failed.
FakeHandler - Class in org.apache.nutch.tools.proxy
 
FakeHandler(FakeHandler.Mode, FakeHandler.Mode, int, int, int, int) - Constructor for class org.apache.nutch.tools.proxy.FakeHandler
Create fake pages.
FakeHandler.Mode - Enum in org.apache.nutch.tools.proxy
Create links to hosts generated from a pool of numHosts/numPages random names.
Feed - Interface in org.apache.nutch.metadata
A collection of Feed property names extracted by the ROME library.
FEED - Static variable in interface org.apache.nutch.metadata.Feed
 
FEED_AUTHOR - Static variable in interface org.apache.nutch.metadata.Feed
 
FEED_PUBLISHED - Static variable in interface org.apache.nutch.metadata.Feed
 
FEED_TAGS - Static variable in interface org.apache.nutch.metadata.Feed
 
FEED_UPDATED - Static variable in interface org.apache.nutch.metadata.Feed
 
fetch(String, int, boolean, int) - Method in class org.apache.nutch.fetcher.FetcherJob
Run fetcher.
FETCH_STATUS_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
FETCH_TIME_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
FetchEntry - Class in org.apache.nutch.fetcher
 
FetchEntry() - Constructor for class org.apache.nutch.fetcher.FetchEntry
 
FetchEntry(Configuration, String, WebPage) - Constructor for class org.apache.nutch.fetcher.FetchEntry
 
FetcherJob - Class in org.apache.nutch.fetcher
Multi-threaded fetcher.
FetcherJob() - Constructor for class org.apache.nutch.fetcher.FetcherJob
 
FetcherJob(Configuration) - Constructor for class org.apache.nutch.fetcher.FetcherJob
 
FetcherJob.FetcherMapper - Class in org.apache.nutch.fetcher
Mapper class for Fetcher.
FetcherJob.FetcherMapper() - Constructor for class org.apache.nutch.fetcher.FetcherJob.FetcherMapper
 
FetcherReducer - Class in org.apache.nutch.fetcher
 
FetcherReducer() - Constructor for class org.apache.nutch.fetcher.FetcherReducer
 
FetchSchedule - Interface in org.apache.nutch.crawl
This interface defines the contract for implementations that manipulate fetch times and re-fetch intervals.
FetchScheduleFactory - Class in org.apache.nutch.crawl
Creates and caches a FetchSchedule implementation.
FIELD - Static variable in class org.creativecommons.nutch.CCIndexingFilter
The name of the document field we use.
FIELD_NAME - Static variable in class org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter
Doc field name
FieldPluggable - Interface in org.apache.nutch.plugin
 
File - Class in org.apache.nutch.protocol.file
This class is a protocol plugin used for file: scheme.
File() - Constructor for class org.apache.nutch.protocol.file.File
 
FileError - Exception in org.apache.nutch.protocol.file
Thrown for File error codes.
FileError(int) - Constructor for exception org.apache.nutch.protocol.file.FileError
 
FileException - Exception in org.apache.nutch.protocol.file
 
FileException() - Constructor for exception org.apache.nutch.protocol.file.FileException
 
FileException(String) - Constructor for exception org.apache.nutch.protocol.file.FileException
 
FileException(String, Throwable) - Constructor for exception org.apache.nutch.protocol.file.FileException
 
FileException(Throwable) - Constructor for exception org.apache.nutch.protocol.file.FileException
 
fileLen - Variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
FileResponse - Class in org.apache.nutch.protocol.file
FileResponse.java mimics file replies as http response.
FileResponse(URL, WebPage, File, Configuration) - Constructor for class org.apache.nutch.protocol.file.FileResponse
 
filter(String, WebPage, Parse, HTMLMetaTags, DocumentFragment) - Method in class org.apache.nutch.analysis.lang.HTMLLanguageParser
Scan the HTML document looking at possible indications of content language
1.
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.analysis.lang.LanguageIndexingFilter
 
filter(String) - Method in class org.apache.nutch.collection.Subcollection
Simple "indexOf" currentFilter for matching patterns.
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.anchor.AnchorIndexingFilter
The AnchorIndexingFilter filter object which supports boolean configuration settings for the deduplication of anchors.
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.basic.BasicIndexingFilter
The BasicIndexingFilter filter object which supports boolean configurable value for length of characters permitted within the title @see indexer.max.title.length in nutch-default.xml
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.html.HtmlIndexingFilter
 
filter(NutchDocument, String, WebPage) - Method in interface org.apache.nutch.indexer.IndexingFilter
Adds fields or otherwise modifies the document that will be indexed for a parse.
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.IndexingFilters
Run all defined filters.
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.metadata.MetadataIndexer
 
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.more.MoreIndexingFilter
 
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter
 
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.indexer.tld.TLDIndexingFilter
 
filter(NutchDocument, String, WebPage) - Method in class org.apache.nutch.microformats.reltag.RelTagIndexingFilter
The RelTagIndexingFilter filter object.
filter(String, WebPage, Parse, HTMLMetaTags, DocumentFragment) - Method in class org.apache.nutch.microformats.reltag.RelTagParser
 
filter(String) - Method in interface org.apache.nutch.net.URLFilter
 
filter(String) - Method in class org.apache.nutch.net.URLFilters
Run all defined filters.
filter(String, WebPage, Parse, HTMLMetaTags, DocumentFragment) - Method in class org.apache.nutch.parse.js.JSParseFilter
Scan the JavaScript looking for possible Outlink's
filter(String, WebPage, Parse, HTMLMetaTags, DocumentFragment) - Method in class org.apache.nutch.parse.metatags.MetaTagsParser
 
filter(String, WebPage, Parse, HTMLMetaTags, DocumentFragment) - Method in interface org.apache.nutch.parse.ParseFilter
Adds metadata or otherwise modifies a parse, given the DOM tree of a page.
filter(String, WebPage, Parse, HTMLMetaTags, DocumentFragment) - Method in class org.apache.nutch.parse.ParseFilters
Run all defined filters.
filter(String) - Method in class org.apache.nutch.urlfilter.api.RegexURLFilterBase
 
filter(String) - Method in class org.apache.nutch.urlfilter.domain.DomainURLFilter
 
filter(String) - Method in class org.apache.nutch.urlfilter.prefix.PrefixURLFilter
 
filter(String) - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
filter(String) - Method in class org.apache.nutch.urlfilter.validator.UrlValidator
 
filter(NutchDocument, String, WebPage) - Method in class org.creativecommons.nutch.CCIndexingFilter
 
filter(String, WebPage, Parse, HTMLMetaTags, DocumentFragment) - Method in class org.creativecommons.nutch.CCParseFilter
Adds metadata or otherwise modifies a parse of an HTML document, given the DOM tree of a page.
finalize() - Method in class org.apache.nutch.plugin.Plugin
 
finalize() - Method in class org.apache.nutch.plugin.PluginRepository
 
finalize() - Method in class org.apache.nutch.protocol.ftp.Ftp
 
findAll() - Method in class org.apache.nutch.webui.service.impl.SeedListServiceImpl
 
findAll() - Method in interface org.apache.nutch.webui.service.SeedListService
 
findAuthentication(Metadata) - Method in class org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory
 
findWorker(String) - Method in class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
FORBID_ALL_RULES - Static variable in class org.apache.nutch.protocol.RobotRulesParser
A BaseRobotRules object appropriate for use when the robots.txt file is not fetched due to a 403/Forbidden response; all requests are disallowed.
forceRefetch(String, WebPage, boolean) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
This method resets fetchTime, fetchInterval, modifiedTime, retriesSinceFetch and page signature, so that it forces refetching.
forceRefetch(String, WebPage, boolean) - Method in interface org.apache.nutch.crawl.FetchSchedule
This method resets fetchTime, fetchInterval, modifiedTime and page signature, so that it forces refetching.
FORMAT - Static variable in interface org.apache.nutch.metadata.DublinCore
Typically, Format may include the media-type or dimensions of the resource.
format - Static variable in class org.apache.nutch.net.protocols.HttpDateFormat
 
forName(String) - Method in class org.apache.nutch.util.MimeUtil
A facade interface to Tika's underlying MimeTypes.forName(String) method.
fromHexString(String) - Static method in class org.apache.nutch.util.StringUtil
Convert a String containing consecutive (no inside whitespace) hexadecimal digits into a corresponding byte array.
FSUtils - Class in org.apache.nutch.util
Utility methods for common filesystem operations.
FSUtils() - Constructor for class org.apache.nutch.util.FSUtils
 
Ftp - Class in org.apache.nutch.protocol.ftp
This class is a protocol plugin used for ftp: scheme.
Ftp() - Constructor for class org.apache.nutch.protocol.ftp.Ftp
 
FtpError - Exception in org.apache.nutch.protocol.ftp
Thrown for Ftp error codes.
FtpError(int) - Constructor for exception org.apache.nutch.protocol.ftp.FtpError
 
FtpException - Exception in org.apache.nutch.protocol.ftp
Superclass for important exceptions thrown during FTP talk, that must be handled with care.
FtpException() - Constructor for exception org.apache.nutch.protocol.ftp.FtpException
 
FtpException(String) - Constructor for exception org.apache.nutch.protocol.ftp.FtpException
 
FtpException(String, Throwable) - Constructor for exception org.apache.nutch.protocol.ftp.FtpException
 
FtpException(Throwable) - Constructor for exception org.apache.nutch.protocol.ftp.FtpException
 
FtpExceptionBadSystResponse - Exception in org.apache.nutch.protocol.ftp
Exception indicating bad reply of SYST command.
FtpExceptionCanNotHaveDataConnection - Exception in org.apache.nutch.protocol.ftp
Exception indicating failure of opening data connection.
FtpExceptionControlClosedByForcedDataClose - Exception in org.apache.nutch.protocol.ftp
Exception indicating control channel is closed by server end, due to forced closure of data channel at client (our) end.
FtpExceptionUnknownForcedDataClose - Exception in org.apache.nutch.protocol.ftp
Exception indicating unrecognizable reply from server after forced closure of data channel by client (our) side.
FtpResponse - Class in org.apache.nutch.protocol.ftp
FtpResponse.java mimics ftp replies as http response.
FtpResponse(URL, WebPage, Ftp, Configuration) - Constructor for class org.apache.nutch.protocol.ftp.FtpResponse
 
FtpRobotRulesParser - Class in org.apache.nutch.protocol.ftp
This class is used for parsing robots for urls belonging to FTP protocol.
FtpRobotRulesParser(Configuration) - Constructor for class org.apache.nutch.protocol.ftp.FtpRobotRulesParser
 

G

generate(long, long, boolean, boolean) - Method in class org.apache.nutch.crawl.GeneratorJob
Mark URLs ready for fetching.
GENERATE_COUNT - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATE_TIME_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
GENERATE_UPDATE_CRAWLDB - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_COUNT_MODE - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_COUNT_VALUE_DOMAIN - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_COUNT_VALUE_HOST - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_COUNT_VALUE_IP - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_CUR_TIME - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_DELAY - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_FILTER - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_MAX_COUNT - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_MIN_SCORE - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_NORMALISE - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_RANDOM_SEED - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GENERATOR_TOP_N - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
GeneratorJob - Class in org.apache.nutch.crawl
 
GeneratorJob() - Constructor for class org.apache.nutch.crawl.GeneratorJob
 
GeneratorJob(Configuration) - Constructor for class org.apache.nutch.crawl.GeneratorJob
 
GeneratorJob.SelectorEntry - Class in org.apache.nutch.crawl
 
GeneratorJob.SelectorEntry() - Constructor for class org.apache.nutch.crawl.GeneratorJob.SelectorEntry
 
GeneratorJob.SelectorEntry(String, float) - Constructor for class org.apache.nutch.crawl.GeneratorJob.SelectorEntry
 
GeneratorJob.SelectorEntryComparator - Class in org.apache.nutch.crawl
 
GeneratorJob.SelectorEntryComparator() - Constructor for class org.apache.nutch.crawl.GeneratorJob.SelectorEntryComparator
 
GeneratorMapper - Class in org.apache.nutch.crawl
 
GeneratorMapper() - Constructor for class org.apache.nutch.crawl.GeneratorMapper
 
GeneratorReducer - Class in org.apache.nutch.crawl
Reduce class for generate The #reduce() method write a random integer to all generated URLs.
GeneratorReducer() - Constructor for class org.apache.nutch.crawl.GeneratorReducer
 
generatorSortValue(String, WebPage, float) - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
generatorSortValue(String, WebPage, float) - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
generatorSortValue(String, WebPage, float) - Method in interface org.apache.nutch.scoring.ScoringFilter
This method prepares a sort value for the purpose of sorting and selecting top N scoring pages during fetchlist generation.
generatorSortValue(String, WebPage, float) - Method in class org.apache.nutch.scoring.ScoringFilters
Calculate a sort value for Generate.
generatorSortValue(String, WebPage, float) - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
GenericWritableConfigurable - Class in org.apache.nutch.util
A generic Writable wrapper that can inject Configuration to Configurables
GenericWritableConfigurable() - Constructor for class org.apache.nutch.util.GenericWritableConfigurable
 
get(String) - Method in interface org.apache.nutch.api.ConfManager
 
get(String) - Method in class org.apache.nutch.api.impl.RAMConfManager
 
get(String, String) - Method in class org.apache.nutch.api.impl.RAMJobManager
 
get(String, String) - Method in interface org.apache.nutch.api.JobManager
 
get(String) - Method in class org.apache.nutch.host.HostDb
 
get(String) - Method in class org.apache.nutch.metadata.Metadata
Get the value associated to a metadata name.
get(String) - Method in class org.apache.nutch.metadata.SpellCheckedMetadata
 
get(Configuration) - Static method in class org.apache.nutch.plugin.PluginRepository
 
get(int) - Method in class org.apache.nutch.storage.Host
 
get(int) - Method in class org.apache.nutch.storage.ParseStatus
 
get(int) - Method in class org.apache.nutch.storage.ProtocolStatus
 
get(int) - Method in class org.apache.nutch.storage.WebPage
 
get(String) - Method in class org.apache.nutch.util.domain.DomainSuffixes
Return the DomainSuffix object for the extension, if extension is a top level domain returned object will be an instance of TopLevelDomain
get(Configuration) - Static method in class org.apache.nutch.util.ObjectCache
 
getAccept() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getAcceptedIssuers() - Method in class org.apache.nutch.protocol.httpclient.DummyX509TrustManager
 
getAcceptLanguage() - Method in class org.apache.nutch.protocol.http.api.HttpBase
Value of "Accept-Language" request header sent by Nutch.
getAliases() - Method in class org.apache.nutch.parse.ParsePluginList
 
getAll() - Method in class org.apache.nutch.collection.CollectionManager
Returns all collections
getAllJobs() - Method in class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
getAnchor() - Method in class org.apache.nutch.parse.Outlink
 
getAnchor() - Method in class org.apache.nutch.scoring.ScoreDatum
 
getArg(ParseStatus, int) - Static method in class org.apache.nutch.parse.ParseStatusUtils
 
getArgs() - Method in class org.apache.nutch.api.model.request.JobConfig
 
getArgs() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getArgs() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Gets the value of the 'args' field
getArgs() - Method in class org.apache.nutch.storage.ParseStatus
Gets the value of the 'args' field.
getArgs() - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Gets the value of the 'args' field.
getArgs() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Gets the value of the 'args' field
getArgs() - Method in class org.apache.nutch.storage.ProtocolStatus
Gets the value of the 'args' field.
getArgs() - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Gets the value of the 'args' field.
getArgs() - Method in class org.apache.nutch.webui.client.model.JobConfig
 
getArgs() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getAsMap(String) - Method in interface org.apache.nutch.api.ConfManager
 
getAsMap(String) - Method in class org.apache.nutch.api.impl.RAMConfManager
 
getAsyncExecutor() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
getAttribute(String) - Method in class org.apache.nutch.plugin.Extension
Returns a attribute value, that is setuped in the manifest file and is definied by the extension point xml schema.
getAuthentication(String, Configuration) - Static method in class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
This method is responsible for providing Basic authentication information.
getBase(Node) - Method in class org.apache.nutch.parse.html.DOMContentUtils
If Node contains a BASE tag then it's HREF is returned.
getBaseHref() - Method in class org.apache.nutch.parse.HTMLMetaTags
A convenience method.
getBaseUrl() - Method in class org.apache.nutch.protocol.Content
The base url for relative links contained in the content.
getBaseUrl() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'baseUrl' field
getBaseUrl() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'baseUrl' field.
getBaseUrl() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'baseUrl' field.
getBasicPattern() - Static method in class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
Provides a pattern which can be used by an outside resource to determine if this class can provide credentials based on simple header information.
getBatchId() - Method in class org.apache.nutch.api.model.request.DbFilter
 
getBatchId() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'batchId' field
getBatchId() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'batchId' field.
getBatchId() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'batchId' field.
getBlackListString() - Method in class org.apache.nutch.collection.Subcollection
Returns blacklist String
getBoost() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecord
 
getBoost() - Method in class org.apache.nutch.util.domain.DomainSuffix
 
getBuilder(String) - Static method in class org.apache.nutch.webui.pages.components.ColorEnumLabel
 
getByHostName(String) - Method in class org.apache.nutch.host.HostDb
 
getCachedClass(PluginDescriptor, String) - Method in class org.apache.nutch.plugin.PluginRepository
 
getCacheKey(URL) - Static method in class org.apache.nutch.protocol.http.api.HttpRobotRulesParser
Compose unique key to store and access robot rules in cache for given URL
getClasses() - Method in class org.apache.nutch.api.NutchServer
 
getClassLoader() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns a cached classloader for a plugin.
getClazz() - Method in class org.apache.nutch.plugin.Extension
Returns the full class name of the extension point implementation
getClient(NutchInstance) - Method in class org.apache.nutch.webui.client.NutchClientFactory
 
getCode() - Method in interface org.apache.nutch.net.protocols.Response
Returns the response code.
getCode(int) - Method in exception org.apache.nutch.protocol.file.FileError
 
getCode() - Method in class org.apache.nutch.protocol.file.FileResponse
Returns the response code.
getCode(int) - Method in exception org.apache.nutch.protocol.ftp.FtpError
 
getCode() - Method in class org.apache.nutch.protocol.ftp.FtpResponse
Returns the response code.
getCode() - Method in class org.apache.nutch.protocol.http.HttpResponse
 
getCode() - Method in class org.apache.nutch.protocol.httpclient.HttpResponse
 
getCode() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Gets the value of the 'code' field
getCode() - Method in class org.apache.nutch.storage.ProtocolStatus
Gets the value of the 'code' field.
getCode() - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Gets the value of the 'code' field.
getCollectionManager(Configuration) - Static method in class org.apache.nutch.collection.CollectionManager
 
getCommand() - Method in class org.apache.nutch.util.CommandRunner
 
getConf() - Method in class org.apache.nutch.analysis.lang.HTMLLanguageParser
 
getConf() - Method in class org.apache.nutch.analysis.lang.LanguageIndexingFilter
 
getConf() - Method in class org.apache.nutch.crawl.URLPartitioner.FetchEntryPartitioner
 
getConf() - Method in class org.apache.nutch.crawl.URLPartitioner
 
getConf() - Method in class org.apache.nutch.crawl.URLPartitioner.SelectorEntryPartitioner
 
getConf() - Method in class org.apache.nutch.host.HostDbUpdateJob
 
getConf() - Method in class org.apache.nutch.host.HostInjectorJob
 
getConf() - Method in class org.apache.nutch.indexer.anchor.AnchorIndexingFilter
Get the Configuration object
getConf() - Method in class org.apache.nutch.indexer.basic.BasicIndexingFilter
Get the Configuration object
getConf() - Method in class org.apache.nutch.indexer.CleaningJob
 
getConf() - Method in class org.apache.nutch.indexer.html.HtmlIndexingFilter
 
getConf() - Method in class org.apache.nutch.indexer.IndexingFiltersChecker
 
getConf() - Method in class org.apache.nutch.indexer.metadata.MetadataIndexer
 
getConf() - Method in class org.apache.nutch.indexer.more.MoreIndexingFilter
 
getConf() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
getConf() - Method in class org.apache.nutch.indexer.tld.TLDIndexingFilter
 
getConf() - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
getConf() - Method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
getConf() - Method in class org.apache.nutch.microformats.reltag.RelTagIndexingFilter
Get the Configuration object
getConf() - Method in class org.apache.nutch.microformats.reltag.RelTagParser
Get the Configuration object
getConf() - Method in class org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer
 
getConf() - Method in class org.apache.nutch.parse.html.HtmlParser
 
getConf() - Method in class org.apache.nutch.parse.js.JSParseFilter
Get the Configuration object
getConf() - Method in class org.apache.nutch.parse.metatags.MetaTagsParser
 
getConf() - Method in class org.apache.nutch.parse.ParserChecker
 
getConf() - Method in class org.apache.nutch.parse.ParserJob
 
getConf() - Method in class org.apache.nutch.parse.ParseUtil
 
getConf() - Method in class org.apache.nutch.parse.tika.TikaParser
 
getConf() - Method in class org.apache.nutch.protocol.file.File
Get the Configuration object
getConf() - Method in class org.apache.nutch.protocol.ftp.Ftp
Get the Configuration object
getConf() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getConf() - Method in class org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory
 
getConf() - Method in class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
 
getConf() - Method in class org.apache.nutch.protocol.RobotRulesParser
Get the Configuration object
getConf() - Method in class org.apache.nutch.protocol.sftp.Sftp
Get the Configuration object
getConf() - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
getConf() - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
 
getConf() - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
getConf() - Method in class org.apache.nutch.urlfilter.api.RegexURLFilterBase
 
getConf() - Method in class org.apache.nutch.urlfilter.domain.DomainURLFilter
 
getConf() - Method in class org.apache.nutch.urlfilter.prefix.PrefixURLFilter
 
getConf() - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
getConf() - Method in class org.apache.nutch.urlfilter.validator.UrlValidator
 
getConf() - Method in class org.apache.nutch.util.domain.DomainStatistics
 
getConf() - Method in class org.apache.nutch.util.GenericWritableConfigurable
 
getConf() - Method in class org.creativecommons.nutch.CCIndexingFilter
 
getConf() - Method in class org.creativecommons.nutch.CCParseFilter
 
getConfId() - Method in class org.apache.nutch.api.model.request.JobConfig
 
getConfId() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getConfId() - Method in class org.apache.nutch.webui.client.model.JobConfig
 
getConfId() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getConfig(String) - Method in class org.apache.nutch.api.resources.ConfigResource
 
getConfigId() - Method in class org.apache.nutch.api.model.request.NutchConfig
 
getConfigs() - Method in class org.apache.nutch.api.resources.ConfigResource
 
getConfiguration() - Method in class org.apache.nutch.api.model.response.NutchStatus
 
getConfiguration() - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
getConfMgr() - Method in class org.apache.nutch.api.NutchServer
 
getConnectionSource() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
getConnectionStatus() - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
getConnectionStatus() - Method in interface org.apache.nutch.webui.client.NutchClient
 
getConnectionStatus() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getConnectionStatus(Long) - Method in class org.apache.nutch.webui.service.impl.NutchServiceImpl
 
getConnectionStatus(Long) - Method in interface org.apache.nutch.webui.service.NutchService
 
getContent() - Method in interface org.apache.nutch.net.protocols.Response
Returns the full content of the response.
getContent() - Method in class org.apache.nutch.protocol.Content
The binary content retrieved.
getContent() - Method in class org.apache.nutch.protocol.file.FileResponse
 
getContent() - Method in class org.apache.nutch.protocol.ftp.FtpResponse
 
getContent() - Method in class org.apache.nutch.protocol.http.HttpResponse
 
getContent() - Method in class org.apache.nutch.protocol.httpclient.HttpResponse
 
getContent() - Method in class org.apache.nutch.protocol.ProtocolOutput
 
getContent() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'content' field
getContent() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'content' field.
getContent() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'content' field.
getContentType() - Method in exception org.apache.nutch.parse.ParserNotFound
 
getContentType() - Method in class org.apache.nutch.protocol.Content
The media type of the retrieved content.
getContentType() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'contentType' field
getContentType() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'contentType' field.
getContentType() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'contentType' field.
getCopyMap() - Method in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
getCount(E) - Method in class org.apache.nutch.util.Histogram
 
getCountryName() - Method in class org.apache.nutch.util.domain.TopLevelDomain
Returns the country name if TLD is Country Code TLD
getCrawlDelay() - Method in interface org.apache.nutch.protocol.RobotRules
Get Crawl-Delay, in milliseconds.
getCrawlId() - Method in class org.apache.nutch.api.model.request.JobConfig
 
getCrawlId() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getCrawlId() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getCrawlId() - Method in class org.apache.nutch.webui.client.model.JobConfig
 
getCrawlId() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getCrawlName() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getCrawls() - Method in interface org.apache.nutch.webui.service.CrawlService
 
getCrawls() - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
getCreatedDaos() - Method in class org.apache.nutch.webui.config.CustomDaoFactory
 
getCredentials() - Method in interface org.apache.nutch.protocol.httpclient.HttpAuthentication
Gets the credentials generated by the HttpAuthentication object.
getCredentials() - Method in class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
Gets the Basic credentials generated by this HttpBasicAuthentication object
getCurrentInstance() - Method in class org.apache.nutch.webui.pages.AbstractBasePage
 
getCurrentKey() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecordReader
 
getCurrentNode() - Method in class org.apache.nutch.parse.html.DOMBuilder
Get the node currently being processed.
getCurrentNode() - Method in class org.apache.nutch.util.NodeWalker
Return the current node.
getCurrentValue() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecordReader
 
getDaoFactory() - Method in class org.apache.nutch.webui.config.SpringConfiguration
 
getDataStoreClass(Configuration) - Static method in class org.apache.nutch.storage.StorageUtils
Return the Persistent Gora class used to persist Nutch Web data.
getDatum() - Method in class org.apache.nutch.crawl.URLWebPage
 
getDependencies() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns a array of plugin ids.
getDescriptor() - Method in class org.apache.nutch.plugin.Extension
return the plugin descriptor.
getDescriptor() - Method in class org.apache.nutch.plugin.Plugin
Returns the plugin descriptor
getDistance() - Method in class org.apache.nutch.scoring.ScoreDatum
 
getDocBegin() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputSplit
 
getDocumentMeta() - Method in class org.apache.nutch.indexer.NutchDocument
 
getDom(InputStream) - Static method in class org.apache.nutch.util.DomUtil
Returns parsed dom tree or null if any error
getDomain() - Method in class org.apache.nutch.util.domain.DomainSuffix
 
getDomainName(URL) - Static method in class org.apache.nutch.util.URLUtil
Returns the domain name of the url.
getDomainName(String) - Static method in class org.apache.nutch.util.URLUtil
Returns the domain name of the url.
getDomainSuffix(URL) - Static method in class org.apache.nutch.util.URLUtil
Returns the DomainSuffix corresponding to the last public part of the hostname
getDomainSuffix(String) - Static method in class org.apache.nutch.util.URLUtil
Returns the DomainSuffix corresponding to the last public part of the hostname
getEmptyParse(Exception, Configuration) - Static method in class org.apache.nutch.parse.ParseStatusUtils
 
getEmptyParse(int, String, Configuration) - Static method in class org.apache.nutch.parse.ParseStatusUtils
 
getEndKey() - Method in class org.apache.nutch.api.model.request.DbFilter
 
getException() - Method in class org.apache.nutch.api.model.response.ErrorResponse
 
getExitValue() - Method in class org.apache.nutch.util.CommandRunner
 
getExpireTime() - Method in interface org.apache.nutch.protocol.RobotRules
Get expire time
getExportedLibUrls() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns a array exported librareis as URLs
getExtensionInstance() - Method in class org.apache.nutch.plugin.Extension
Return an instance of the extension implementatio.
getExtensionPoint(String) - Method in class org.apache.nutch.plugin.PluginRepository
Returns a extension point indentified by a extension point id.
getExtensions(String) - Method in class org.apache.nutch.parse.ParserFactory
Finds the best-suited parse plugin for a given contentType.
getExtensions() - Method in class org.apache.nutch.plugin.ExtensionPoint
Returns a array of extensions that listen to this extension point
getExtensions() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns an array of extensions.
getExtenstionPoints() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns a array of extension points.
getFetchInterval() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'fetchInterval' field
getFetchInterval() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'fetchInterval' field.
getFetchInterval() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'fetchInterval' field.
getFetchSchedule(Configuration) - Static method in class org.apache.nutch.crawl.FetchScheduleFactory
Return the FetchSchedule implementation.
getFetchTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'fetchTime' field
getFetchTime() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'fetchTime' field.
getFetchTime() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'fetchTime' field.
getFieldNames() - Method in class org.apache.nutch.indexer.NutchDocument
 
getFields() - Method in class org.apache.nutch.analysis.lang.HTMLLanguageParser
 
getFields() - Method in class org.apache.nutch.analysis.lang.LanguageIndexingFilter
 
getFields() - Method in class org.apache.nutch.api.model.request.DbFilter
 
getFields() - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
 
getFields() - Method in interface org.apache.nutch.crawl.FetchSchedule
 
getFields(Job) - Method in class org.apache.nutch.crawl.GeneratorJob
 
getFields() - Method in class org.apache.nutch.crawl.MD5Signature
 
getFields() - Method in class org.apache.nutch.crawl.Signature
 
getFields(Configuration) - Static method in class org.apache.nutch.crawl.SignatureFactory
 
getFields() - Method in class org.apache.nutch.crawl.TextMD5Signature
 
getFields() - Method in class org.apache.nutch.crawl.TextProfileSignature
 
getFields(Job) - Method in class org.apache.nutch.fetcher.FetcherJob
 
getFields() - Method in class org.apache.nutch.indexer.anchor.AnchorIndexingFilter
Gets all the fields for a given WebPage Many datastores need to setup the mapreduce job by specifying the fields needed.
getFields() - Method in class org.apache.nutch.indexer.basic.BasicIndexingFilter
Gets all the fields for a given WebPage Many datastores need to setup the mapreduce job by specifying the fields needed.
getFields(Job) - Method in class org.apache.nutch.indexer.CleaningJob
 
getFields() - Method in class org.apache.nutch.indexer.html.HtmlIndexingFilter
 
getFields() - Method in class org.apache.nutch.indexer.IndexCleaningFilters
 
getFields() - Method in class org.apache.nutch.indexer.IndexingFilters
Gets all the fields for a given WebPage Many datastores need to setup the mapreduce job by specifying the fields needed.
getFields() - Method in class org.apache.nutch.indexer.metadata.MetadataIndexer
 
getFields() - Method in class org.apache.nutch.indexer.more.MoreIndexingFilter
 
getFields() - Method in class org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter
 
getFields() - Method in class org.apache.nutch.indexer.tld.TLDIndexingFilter
 
getFields() - Method in class org.apache.nutch.microformats.reltag.RelTagIndexingFilter
Gets all the fields for a given WebPage Many datastores need to setup the mapreduce job by specifying the fields needed.
getFields() - Method in class org.apache.nutch.microformats.reltag.RelTagParser
Gets all the fields for a given WebPage Many datastores need to setup the mapreduce job by specifying the fields needed.
getFields() - Method in class org.apache.nutch.parse.html.HtmlParser
 
getFields() - Method in class org.apache.nutch.parse.js.JSParseFilter
Gets all the fields for a given WebPage Many datastores need to setup the mapreduce job by specifying the fields needed.
getFields() - Method in class org.apache.nutch.parse.metatags.MetaTagsParser
 
getFields() - Method in class org.apache.nutch.parse.ParseFilters
 
getFields() - Method in class org.apache.nutch.parse.ParserFactory
 
getFields(Job) - Method in class org.apache.nutch.parse.ParserJob
 
getFields() - Method in class org.apache.nutch.parse.tika.TikaParser
 
getFields() - Method in interface org.apache.nutch.plugin.FieldPluggable
 
getFields() - Method in class org.apache.nutch.protocol.file.File
 
getFields() - Method in class org.apache.nutch.protocol.ftp.Ftp
 
getFields() - Method in class org.apache.nutch.protocol.http.Http
 
getFields() - Method in class org.apache.nutch.protocol.httpclient.Http
 
getFields() - Method in class org.apache.nutch.protocol.ProtocolFactory
 
getFields() - Method in class org.apache.nutch.protocol.sftp.Sftp
 
getFields() - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
getFields() - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
 
getFields() - Method in class org.apache.nutch.scoring.ScoringFilters
 
getFields() - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
getFields() - Method in class org.creativecommons.nutch.CCIndexingFilter
 
getFields() - Method in class org.creativecommons.nutch.CCParseFilter
 
getFieldsCount() - Method in class org.apache.nutch.storage.Host
Gets the total field count.
getFieldsCount() - Method in class org.apache.nutch.storage.ParseStatus
Gets the total field count.
getFieldsCount() - Method in class org.apache.nutch.storage.ProtocolStatus
Gets the total field count.
getFieldsCount() - Method in class org.apache.nutch.storage.WebPage
Gets the total field count.
getFieldValue(String) - Method in class org.apache.nutch.indexer.NutchDocument
 
getFieldValues(String) - Method in class org.apache.nutch.indexer.NutchDocument
 
getFirst() - Method in class org.apache.nutch.util.Pair
 
getFParsePluginsFile() - Method in class org.apache.nutch.parse.ParsePluginsReader
 
getGeneralTags() - Method in class org.apache.nutch.parse.HTMLMetaTags
Returns all collected values of the general meta tags.
getHeader(String) - Method in interface org.apache.nutch.net.protocols.Response
Returns the value of a named header.
getHeader(String) - Method in class org.apache.nutch.protocol.file.FileResponse
Returns the value of a named header.
getHeader(String) - Method in class org.apache.nutch.protocol.ftp.FtpResponse
Returns the value of a named header.
getHeader(String) - Method in class org.apache.nutch.protocol.http.HttpResponse
 
getHeader(String) - Method in class org.apache.nutch.protocol.httpclient.HttpResponse
 
getHeaders() - Method in interface org.apache.nutch.net.protocols.Response
Returns all the headers.
getHeaders() - Method in class org.apache.nutch.protocol.http.HttpResponse
 
getHeaders() - Method in class org.apache.nutch.protocol.httpclient.HttpResponse
 
getHeaders() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'headers' field
getHeaders() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'headers' field.
getHeaders() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'headers' field.
getHomePage() - Method in class org.apache.nutch.webui.NutchUiApplication
 
getHost(String) - Static method in class org.apache.nutch.util.URLUtil
Returns the lowercased hostname for the url or null if the url is not well formed.
getHost() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getHostBatches(URL) - Static method in class org.apache.nutch.util.URLUtil
Partitions of the hostname of the url by "."
getHostBatches(String) - Static method in class org.apache.nutch.util.URLUtil
Partitions of the hostname of the url by "."
getHttpEquivTags() - Method in class org.apache.nutch.parse.HTMLMetaTags
Returns all collected values of the "http-equiv" meta tags.
getHttpSolrServer(Configuration) - Static method in class org.apache.nutch.indexer.solr.SolrUtils
 
getHttpSolrServer(Configuration) - Static method in class org.apache.nutch.indexwriter.solr.SolrUtils
 
getId() - Method in class org.apache.nutch.api.model.request.SeedList
 
getId() - Method in class org.apache.nutch.api.model.request.SeedUrl
 
getId() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getId() - Method in class org.apache.nutch.collection.Subcollection
 
getId() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecord
 
getId() - Method in class org.apache.nutch.plugin.Extension
Return the unique id of the extension.
getId() - Method in class org.apache.nutch.plugin.ExtensionPoint
Returns the unique id of the extension point.
getId() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getId() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getId() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getId() - Method in class org.apache.nutch.webui.model.SeedList
 
getId() - Method in class org.apache.nutch.webui.model.SeedUrl
 
getIndex() - Method in enum org.apache.nutch.storage.Host.Field
Gets field's index.
getIndex() - Method in enum org.apache.nutch.storage.ParseStatus.Field
Gets field's index.
getIndex() - Method in enum org.apache.nutch.storage.ProtocolStatus.Field
Gets field's index.
getIndex() - Method in enum org.apache.nutch.storage.WebPage.Field
Gets field's index.
getInfo() - Method in class org.apache.nutch.api.impl.JobWorker
 
getInfo(String) - Method in class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
getInfo(String, String) - Method in class org.apache.nutch.api.resources.JobResource
 
getInlinks() - Method in class org.apache.nutch.storage.Host.Builder
Gets the value of the 'inlinks' field
getInlinks() - Method in class org.apache.nutch.storage.Host
Gets the value of the 'inlinks' field.
getInlinks() - Method in class org.apache.nutch.storage.Host.Tombstone
Gets the value of the 'inlinks' field.
getInlinks() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'inlinks' field
getInlinks() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'inlinks' field.
getInlinks() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'inlinks' field.
getInstance(Configuration) - Static method in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
getInstance() - Static method in class org.apache.nutch.util.domain.DomainSuffixes
Singleton instance, lazy instantination
getInstance(Configuration) - Static method in class org.apache.nutch.util.NutchJob
Creates a new NutchJob with no particular Cluster and a given Configuration.
getInstance(Configuration, String) - Static method in class org.apache.nutch.util.NutchJob
Creates a new NutchJob with no particular Cluster and a given jobName.
getInstance(Long) - Method in class org.apache.nutch.webui.service.impl.NutchInstanceServiceImpl
 
getInstance(Long) - Method in interface org.apache.nutch.webui.service.NutchInstanceService
 
getInstances() - Method in class org.apache.nutch.webui.config.NutchGuiConfiguration
 
getInstances() - Method in class org.apache.nutch.webui.service.impl.NutchInstanceServiceImpl
 
getInstances() - Method in interface org.apache.nutch.webui.service.NutchInstanceService
 
getInt(String, int) - Method in class org.apache.nutch.storage.Host
 
getJobClassName() - Method in class org.apache.nutch.api.model.request.JobConfig
 
getJobClassName() - Method in class org.apache.nutch.webui.client.model.JobConfig
 
getJobConfig() - Method in class org.apache.nutch.webui.client.impl.RemoteCommand
 
getJobHistory() - Method in class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
getJobInfo(String) - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
getJobInfo() - Method in class org.apache.nutch.webui.client.impl.RemoteCommand
 
getJobInfo(String) - Method in interface org.apache.nutch.webui.client.NutchClient
 
getJobMgr() - Method in class org.apache.nutch.api.NutchServer
 
getJobRunning() - Method in class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
getJobs() - Method in class org.apache.nutch.api.model.response.NutchStatus
 
getJobs(String) - Method in class org.apache.nutch.api.resources.JobResource
 
getJobs() - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
getKey() - Method in class org.apache.nutch.fetcher.FetchEntry
 
getKeyMap() - Method in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
getKeys() - Method in class org.apache.nutch.util.Histogram
 
getLastModified() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Gets the value of the 'lastModified' field
getLastModified() - Method in class org.apache.nutch.storage.ProtocolStatus
Gets the value of the 'lastModified' field.
getLastModified() - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Gets the value of the 'lastModified' field.
getLength() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputSplit
 
getLocations() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputSplit
 
getLong(String, long) - Method in class org.apache.nutch.storage.Host
 
getMajorCode() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Gets the value of the 'majorCode' field
getMajorCode() - Method in class org.apache.nutch.storage.ParseStatus
Gets the value of the 'majorCode' field.
getMajorCode() - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Gets the value of the 'majorCode' field.
getMarkers() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'markers' field
getMarkers() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'markers' field.
getMarkers() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'markers' field.
getMaxContent() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getMessage() - Method in class org.apache.nutch.api.model.response.ErrorResponse
 
getMessage(ParseStatus) - Static method in class org.apache.nutch.parse.ParseStatusUtils
A convenience method.
getMessage(ProtocolStatus) - Static method in class org.apache.nutch.protocol.ProtocolStatusUtils
 
getMeta(String) - Method in class org.apache.nutch.metadata.MetaWrapper
Get metadata.
getMeta(String) - Method in class org.apache.nutch.scoring.ScoreDatum
 
getMetadata() - Method in class org.apache.nutch.metadata.MetaWrapper
Get all metadata.
getMetadata() - Method in class org.apache.nutch.protocol.Content
Other protocol-specific data.
getMetadata() - Method in class org.apache.nutch.storage.Host.Builder
Gets the value of the 'metadata' field
getMetadata() - Method in class org.apache.nutch.storage.Host
Gets the value of the 'metadata' field.
getMetadata() - Method in class org.apache.nutch.storage.Host.Tombstone
Gets the value of the 'metadata' field.
getMetadata() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'metadata' field
getMetadata() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'metadata' field.
getMetadata() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'metadata' field.
getMetaTags(HTMLMetaTags, Node, URL) - Static method in class org.apache.nutch.parse.html.HTMLMetaProcessor
Sets the indicators in robotsMeta to appropriate values, based on any META tags found under the given node.
getMetaTags(HTMLMetaTags, Node, URL) - Static method in class org.apache.nutch.parse.tika.HTMLMetaProcessor
Sets the indicators in robotsMeta to appropriate values, based on any META tags found under the given node.
getMetaValues(String) - Method in class org.apache.nutch.metadata.MetaWrapper
Get multiple metadata.
getMimeType(String) - Method in class org.apache.nutch.util.MimeUtil
Facade interface to Tika's underlying MimeTypes.getMimeType(String) method.
getMimeType(File) - Method in class org.apache.nutch.util.MimeUtil
Facade interface to Tika's underlying MimeTypes.getMimeType(File) method.
getMinorCode() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Gets the value of the 'minorCode' field
getMinorCode() - Method in class org.apache.nutch.storage.ParseStatus
Gets the value of the 'minorCode' field.
getMinorCode() - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Gets the value of the 'minorCode' field.
getModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'modifiedTime' field
getModifiedTime() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'modifiedTime' field.
getModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'modifiedTime' field.
getMsg() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getMsg() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getName() - Method in class org.apache.nutch.api.model.request.SeedList
 
getName() - Method in class org.apache.nutch.collection.Subcollection
 
getName(byte) - Static method in class org.apache.nutch.crawl.CrawlStatus
 
getName() - Method in class org.apache.nutch.plugin.ExtensionPoint
Returns the name of the extension point.
getName() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns the name of the plugin.
getName(int) - Static method in class org.apache.nutch.protocol.ProtocolStatusUtils
 
getName() - Method in enum org.apache.nutch.storage.Host.Field
Gets field's name.
getName() - Method in enum org.apache.nutch.storage.Mark
 
getName() - Method in enum org.apache.nutch.storage.ParseStatus.Field
Gets field's name.
getName() - Method in enum org.apache.nutch.storage.ProtocolStatus.Field
Gets field's name.
getName() - Method in enum org.apache.nutch.storage.WebPage.Field
Gets field's name.
getName() - Method in class org.apache.nutch.webui.model.NutchConfig
 
getName() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getName() - Method in class org.apache.nutch.webui.model.SeedList
 
getNoCache() - Method in class org.apache.nutch.parse.HTMLMetaTags
A convenience method.
getNoFollow() - Method in class org.apache.nutch.parse.HTMLMetaTags
A convenience method.
getNoIndex() - Method in class org.apache.nutch.parse.HTMLMetaTags
A convenience method.
getNormalizedName(String) - Static method in class org.apache.nutch.metadata.SpellCheckedMetadata
Get the normalized name of metadata attribute name.
getNotExportedLibUrls() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns a array of libraries as URLs that are not exported by the plugin.
getNumberOfRounds() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getNutchConfig(String) - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
getNutchConfig(String) - Method in interface org.apache.nutch.webui.client.NutchClient
 
getNutchConfig(Long) - Method in class org.apache.nutch.webui.service.impl.NutchServiceImpl
 
getNutchConfig(Long) - Method in interface org.apache.nutch.webui.service.NutchService
 
getNutchInstance() - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
getNutchInstance() - Method in interface org.apache.nutch.webui.client.NutchClient
 
getNutchStatus() - Method in class org.apache.nutch.api.resources.AdminResource
 
getNutchStatus() - Method in class org.apache.nutch.webui.client.impl.NutchClientImpl
 
getNutchStatus() - Method in interface org.apache.nutch.webui.client.NutchClient
 
getNutchStatus(Long) - Method in class org.apache.nutch.webui.service.impl.NutchServiceImpl
 
getNutchStatus(Long) - Method in interface org.apache.nutch.webui.service.NutchService
 
getObject(String) - Method in class org.apache.nutch.util.ObjectCache
 
getOutlinks(URL, ArrayList<Outlink>, Node) - Method in class org.apache.nutch.parse.html.DOMContentUtils
This method finds all anchors below the supplied DOM node, and creates appropriate Outlink records for each (relative to the supplied base URL), and adds them to the outlinks ArrayList.
getOutlinks(String, Configuration) - Static method in class org.apache.nutch.parse.OutlinkExtractor
Extracts Outlink from given plain text.
getOutlinks(String, String, Configuration) - Static method in class org.apache.nutch.parse.OutlinkExtractor
Extracts Outlink from given plain text and adds anchor to the extracted Outlinks
getOutlinks() - Method in class org.apache.nutch.parse.Parse
 
getOutlinks(URL, ArrayList<Outlink>, Node) - Method in class org.apache.nutch.parse.tika.DOMContentUtils
This method finds all anchors below the supplied DOM node, and creates appropriate Outlink records for each (relative to the supplied base URL), and adds them to the outlinks ArrayList.
getOutlinks() - Method in class org.apache.nutch.storage.Host.Builder
Gets the value of the 'outlinks' field
getOutlinks() - Method in class org.apache.nutch.storage.Host
Gets the value of the 'outlinks' field.
getOutlinks() - Method in class org.apache.nutch.storage.Host.Tombstone
Gets the value of the 'outlinks' field.
getOutlinks() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'outlinks' field
getOutlinks() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'outlinks' field.
getOutlinks() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'outlinks' field.
getOutputCommitter(TaskAttemptContext) - Method in class org.apache.nutch.indexer.IndexerOutputFormat
 
getPage(String) - Static method in class org.apache.nutch.util.URLUtil
Returns the page for the url.
getParams() - Method in class org.apache.nutch.api.model.request.NutchConfig
 
getParse(String, WebPage) - Method in class org.apache.nutch.parse.html.HtmlParser
 
getParse(String, WebPage) - Method in class org.apache.nutch.parse.js.JSParseFilter
Set the Configuration object
getParse(String, WebPage) - Method in interface org.apache.nutch.parse.Parser
This method parses content in WebPage instance
getParse(String, WebPage) - Method in class org.apache.nutch.parse.tika.TikaParser
 
getParserById(String) - Method in class org.apache.nutch.parse.ParserFactory
Function returns a Parser instance with the specified extId, representing its extension ID.
getParsers(String, String) - Method in class org.apache.nutch.parse.ParserFactory
Function returns an array of Parsers for a given content type.
getParseStatus() - Method in class org.apache.nutch.parse.Parse
 
getParseStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'parseStatus' field
getParseStatus() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'parseStatus' field.
getParseStatus() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'parseStatus' field.
getPartition(IntWritable, FetchEntry, int) - Method in class org.apache.nutch.crawl.URLPartitioner.FetchEntryPartitioner
 
getPartition(String, int) - Method in class org.apache.nutch.crawl.URLPartitioner
 
getPartition(GeneratorJob.SelectorEntry, WebPage, int) - Method in class org.apache.nutch.crawl.URLPartitioner.SelectorEntryPartitioner
 
getPartition(UrlWithScore, NutchWritable, int) - Method in class org.apache.nutch.crawl.UrlWithScore.UrlOnlyPartitioner
 
getPassAllFilter() - Static method in class org.apache.nutch.util.HadoopFSUtil
Returns PathFilter that passes all paths through.
getPassDirectoriesFilter(FileSystem) - Static method in class org.apache.nutch.util.HadoopFSUtil
Returns PathFilter that passes directories through.
getPassword() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getPaths(FileStatus[]) - Static method in class org.apache.nutch.util.HadoopFSUtil
Turns an array of FileStatus into an array of Paths.
getPluginClass() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns the fully qualified name of the class which implements the abstarct Plugin class.
getPluginDescriptor(String) - Method in class org.apache.nutch.plugin.PluginRepository
Returns the descriptor of one plugin identified by a plugin id.
getPluginDescriptors() - Method in class org.apache.nutch.plugin.PluginRepository
Returns all registed plugin descriptors.
getPluginFolder(String) - Method in class org.apache.nutch.plugin.PluginManifestParser
Return the named plugin folder.
getPluginId() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns the unique identifier of the plug-in or null.
getPluginInstance(PluginDescriptor) - Method in class org.apache.nutch.plugin.PluginRepository
Returns a instance of a plugin.
getPluginList(String) - Method in class org.apache.nutch.parse.ParsePluginList
 
getPluginPath() - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns the directory path of the plugin.
getPort() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getPos() - Method in class org.apache.nutch.tools.arc.ArcRecordReader
Returns the current position in the file.
getPrevFetchTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'prevFetchTime' field
getPrevFetchTime() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'prevFetchTime' field.
getPrevFetchTime() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'prevFetchTime' field.
getPrevModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'prevModifiedTime' field
getPrevModifiedTime() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'prevModifiedTime' field.
getPrevModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'prevModifiedTime' field.
getPrevSignature() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'prevSignature' field
getPrevSignature() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'prevSignature' field.
getPrevSignature() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'prevSignature' field.
getProgress() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecordReader
 
getProgress() - Method in class org.apache.nutch.tools.arc.ArcRecordReader
Returns the percentage of progress in processing the file.
getProgress() - Method in class org.apache.nutch.util.NutchTool
Returns relative progress of the tool, a float in range [0,1].
getProgress() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getProperty(String, String) - Method in class org.apache.nutch.api.resources.ConfigResource
 
getProtocol(String) - Method in class org.apache.nutch.protocol.ProtocolFactory
Returns the appropriate Protocol implementation for a url.
getProtocolOutput(String, WebPage) - Method in class org.apache.nutch.protocol.file.File
Creates a FileResponse object corresponding to the url and return a ProtocolOutput object as per the content received
getProtocolOutput(String, WebPage) - Method in class org.apache.nutch.protocol.ftp.Ftp
Creates a FtpResponse object corresponding to the url and returns a ProtocolOutput object as per the content received
getProtocolOutput(String, WebPage) - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getProtocolOutput(String, WebPage) - Method in interface org.apache.nutch.protocol.Protocol
 
getProtocolOutput(String, WebPage) - Method in class org.apache.nutch.protocol.sftp.Sftp
 
getProtocolStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'protocolStatus' field
getProtocolStatus() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'protocolStatus' field.
getProtocolStatus() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'protocolStatus' field.
getProviderName() - Method in class org.apache.nutch.plugin.PluginDescriptor
 
getProxyHost() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getProxyPort() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getRealm() - Method in interface org.apache.nutch.protocol.httpclient.HttpAuthentication
Gets the realm used by the HttpAuthentication object during creation.
getRealm() - Method in class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
Gets the realm attribute of the HttpBasicAuthentication object.
getRecordReader(InputSplit, JobConf, Reporter) - Method in class org.apache.nutch.tools.arc.ArcInputFormat
Returns the RecordReader for reading the arc file.
getRecordWriter(TaskAttemptContext) - Method in class org.apache.nutch.indexer.IndexerOutputFormat
 
getRefresh() - Method in class org.apache.nutch.parse.HTMLMetaTags
A convenience method.
getRefreshHref() - Method in class org.apache.nutch.parse.HTMLMetaTags
A convenience method.
getRefreshTime() - Method in class org.apache.nutch.parse.HTMLMetaTags
A convenience method.
getRepresentation(Status, Request, Response) - Method in class org.apache.nutch.api.misc.ErrorStatusService
 
getReprUrl() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'reprUrl' field
getReprUrl() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'reprUrl' field.
getReprUrl() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'reprUrl' field.
getResourceString(String, Locale) - Method in class org.apache.nutch.plugin.PluginDescriptor
Returns a I18N'd resource string.
getResponse(URL, WebPage, boolean) - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getResponse(URL, WebPage, boolean) - Method in class org.apache.nutch.protocol.http.Http
 
getResponse(URL, WebPage, boolean) - Method in class org.apache.nutch.protocol.httpclient.Http
Fetches the url with a configured HTTP client and gets the response.
getResult() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getResult() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getRetriesSinceFetch() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'retriesSinceFetch' field
getRetriesSinceFetch() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'retriesSinceFetch' field.
getRetriesSinceFetch() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'retriesSinceFetch' field.
getReversedHost(String) - Static method in class org.apache.nutch.util.TableUtil
Given a reversed url, returns the reversed host E.g "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar"
getRobotRules(String, WebPage) - Method in class org.apache.nutch.protocol.file.File
No robots parsing is done for file protocol.
getRobotRules(String, WebPage) - Method in class org.apache.nutch.protocol.ftp.Ftp
Get the robots rules for a given url
getRobotRules(String, WebPage) - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getRobotRules(String, WebPage) - Method in interface org.apache.nutch.protocol.Protocol
Retrieve robot rules applicable for this url.
getRobotRules(String, WebPage) - Method in class org.apache.nutch.protocol.sftp.Sftp
 
getRobotRulesSet(Protocol, URL) - Method in class org.apache.nutch.protocol.ftp.FtpRobotRulesParser
The hosts for which the caching of robots rules is yet to be done, it sends a Ftp request to the host corresponding to the URL passed, gets robots file, parses the rules and caches the rules object to avoid re-work in future.
getRobotRulesSet(Protocol, URL) - Method in class org.apache.nutch.protocol.http.api.HttpRobotRulesParser
Get the rules from robots.txt which applies for the given url.
getRobotRulesSet(Protocol, String) - Method in class org.apache.nutch.protocol.RobotRulesParser
 
getRobotRulesSet(Protocol, URL) - Method in class org.apache.nutch.protocol.RobotRulesParser
 
getRootNode() - Method in class org.apache.nutch.parse.html.DOMBuilder
Get the root node of the DOM being created.
getRulesReader(Configuration) - Method in class org.apache.nutch.urlfilter.api.RegexURLFilterBase
Returns the name of the file of rules to use for a particular implementation.
getRulesReader(Configuration) - Method in class org.apache.nutch.urlfilter.automaton.AutomatonURLFilter
Rules specified as a config property will override rules specified as a config file.
getRulesReader(Configuration) - Method in class org.apache.nutch.urlfilter.regex.RegexURLFilter
Rules specified as a config property will override rules specified as a config file.
getRunningJobs() - Method in class org.apache.nutch.api.model.response.NutchStatus
 
getRunningJobs() - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
getRuns() - Method in class org.apache.nutch.tools.Benchmark.BenchmarkResults
 
getSchema() - Method in class org.apache.nutch.plugin.ExtensionPoint
Returns a path to the xml schema of a extension point.
getSchema() - Method in class org.apache.nutch.storage.Host
 
getSchema() - Method in class org.apache.nutch.storage.ParseStatus
 
getSchema() - Method in class org.apache.nutch.storage.ProtocolStatus
 
getSchema() - Method in class org.apache.nutch.storage.WebPage
 
getScopedRules() - Method in class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
 
getScore() - Method in class org.apache.nutch.crawl.UrlWithScore
 
getScore() - Method in class org.apache.nutch.indexer.NutchDocument
 
getScore() - Method in class org.apache.nutch.scoring.ScoreDatum
 
getScore() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'score' field
getScore() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'score' field.
getScore() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'score' field.
getSecond() - Method in class org.apache.nutch.util.Pair
 
getSeedDirectory() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getSeedList() - Method in class org.apache.nutch.api.model.request.SeedUrl
 
getSeedList() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getSeedList() - Method in class org.apache.nutch.webui.model.SeedUrl
 
getSeedList(Long) - Method in class org.apache.nutch.webui.service.impl.SeedListServiceImpl
 
getSeedList(Long) - Method in interface org.apache.nutch.webui.service.SeedListService
 
getSeedUrls() - Method in class org.apache.nutch.api.model.request.SeedList
 
getSeedUrls() - Method in class org.apache.nutch.webui.model.SeedList
 
getSeedUrlsCount() - Method in class org.apache.nutch.webui.model.SeedList
 
getSignature(Configuration) - Static method in class org.apache.nutch.crawl.SignatureFactory
Return the default Signature implementation.
getSignature() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'signature' field
getSignature() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'signature' field.
getSignature() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'signature' field.
getSplits(JobContext) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputFormat
 
getStackTrace() - Method in class org.apache.nutch.api.model.response.ErrorResponse
 
getStages() - Method in class org.apache.nutch.tools.Benchmark.BenchmarkResults
 
getStartDate() - Method in class org.apache.nutch.api.model.response.NutchStatus
 
getStartDate() - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
getStarted() - Method in class org.apache.nutch.api.NutchServer
 
getStartKey() - Method in class org.apache.nutch.api.model.request.DbFilter
 
getState() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getState() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getStatus(Throwable, Request, Response) - Method in class org.apache.nutch.api.misc.ErrorStatusService
 
getStatus() - Method in class org.apache.nutch.protocol.ProtocolOutput
 
getStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'status' field
getStatus() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'status' field.
getStatus() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'status' field.
getStatus() - Method in class org.apache.nutch.util.domain.DomainSuffix
 
getStatus() - Method in class org.apache.nutch.util.NutchTool
Returns current status of the running tool.
getStatus() - Method in class org.apache.nutch.webui.client.model.Crawl
 
getSubColection(String) - Method in class org.apache.nutch.collection.CollectionManager
Returns named subcollection
getSubCollections(String) - Method in class org.apache.nutch.collection.CollectionManager
Return names of collections url is part of
getSystemName() - Method in class org.apache.nutch.protocol.ftp.Client
Fetches the system type name from the server and returns the string.
getTargetPoint() - Method in class org.apache.nutch.plugin.Extension
Returns the Id of the extension point, that is implemented by this extension.
getText(StringBuilder, Node, boolean) - Method in class org.apache.nutch.parse.html.DOMContentUtils
This method takes a StringBuilder and a DOM Node, and will append all the content text found beneath the DOM node to the StringBuilder.
getText(StringBuilder, Node) - Method in class org.apache.nutch.parse.html.DOMContentUtils
This is a convinience method, equivalent to getText(sb, node, false).
getText() - Method in class org.apache.nutch.parse.Parse
 
getText(StringBuffer, Node) - Method in class org.apache.nutch.parse.tika.DOMContentUtils
This is a convinience method, equivalent to getText(sb, node, false).
getText() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'text' field
getText() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'text' field.
getText() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'text' field.
getThrownError() - Method in class org.apache.nutch.util.CommandRunner
 
getTikaConfig() - Method in class org.apache.nutch.parse.tika.TikaParser
 
getTimeout() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getTimeout() - Method in class org.apache.nutch.util.CommandRunner
 
getTimeout() - Method in class org.apache.nutch.webui.client.impl.RemoteCommand
 
getTitle(StringBuilder, Node) - Method in class org.apache.nutch.parse.html.DOMContentUtils
This method takes a StringBuffer and a DOM Node, and will append the content text found beneath the first title node to the StringBuffer.
getTitle() - Method in class org.apache.nutch.parse.Parse
 
getTitle(StringBuffer, Node) - Method in class org.apache.nutch.parse.tika.DOMContentUtils
This method takes a StringBuffer and a DOM Node, and will append the content text found beneath the first title node to the StringBuffer.
getTitle() - Method in class org.apache.nutch.storage.WebPage.Builder
Gets the value of the 'title' field
getTitle() - Method in class org.apache.nutch.storage.WebPage
Gets the value of the 'title' field.
getTitle() - Method in class org.apache.nutch.storage.WebPage.Tombstone
Gets the value of the 'title' field.
getTlsPreferredCipherSuites() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getTlsPreferredProtocols() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getTombstone() - Method in class org.apache.nutch.storage.Host
 
getTombstone() - Method in class org.apache.nutch.storage.ParseStatus
 
getTombstone() - Method in class org.apache.nutch.storage.ProtocolStatus
 
getTombstone() - Method in class org.apache.nutch.storage.WebPage
 
getToUrl() - Method in class org.apache.nutch.parse.Outlink
 
getTstamp() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecord
 
getType() - Method in class org.apache.nutch.api.model.request.JobConfig
 
getType() - Method in class org.apache.nutch.api.model.response.JobInfo
 
getType() - Method in class org.apache.nutch.util.domain.TopLevelDomain
 
getType() - Method in class org.apache.nutch.webui.client.model.JobConfig
 
getType() - Method in class org.apache.nutch.webui.client.model.JobInfo
 
getTypes() - Method in class org.apache.nutch.crawl.NutchWritable
 
getUniqueKey() - Method in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
getUrl() - Method in class org.apache.nutch.api.model.request.SeedUrl
 
getUrl() - Method in class org.apache.nutch.crawl.URLWebPage
 
getUrl() - Method in class org.apache.nutch.crawl.UrlWithScore
 
getUrl() - Method in interface org.apache.nutch.net.protocols.Response
Returns the URL used to retrieve this response.
getUrl() - Method in exception org.apache.nutch.parse.ParserNotFound
 
getUrl() - Method in class org.apache.nutch.protocol.Content
The url fetched.
getUrl() - Method in class org.apache.nutch.protocol.http.HttpResponse
 
getUrl() - Method in class org.apache.nutch.protocol.httpclient.HttpResponse
 
getUrl() - Method in exception org.apache.nutch.protocol.ProtocolNotFound
 
getUrl() - Method in class org.apache.nutch.scoring.ScoreDatum
 
getUrl() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getUrl() - Method in class org.apache.nutch.webui.model.SeedUrl
 
getUseHttp11() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getUserAgent() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
getUsername() - Method in class org.apache.nutch.webui.model.NutchInstance
 
getUUID(Configuration) - Static method in class org.apache.nutch.util.NutchConfiguration
Retrieve a Nutch UUID of this configuration object, or null if the configuration was created elsewhere.
getValue(String, String) - Method in class org.apache.nutch.storage.Host
 
getValue(E) - Method in class org.apache.nutch.util.Histogram
 
getValue() - Method in class org.apache.nutch.webui.model.NutchConfig
 
getValues() - Method in class org.apache.nutch.api.model.response.DbQueryResult
 
getValues(String) - Method in class org.apache.nutch.metadata.Metadata
Get the values associated to a metadata name.
getValues(String) - Method in class org.apache.nutch.metadata.SpellCheckedMetadata
 
getVersion() - Method in class org.apache.nutch.plugin.PluginDescriptor
 
getWaitForExit() - Method in class org.apache.nutch.util.CommandRunner
 
getWebPage() - Method in class org.apache.nutch.fetcher.FetchEntry
 
getWebPage() - Method in class org.apache.nutch.util.WebPageWritable
 
getWhiteList() - Method in class org.apache.nutch.collection.Subcollection
Returns whitelist
getWhiteListString() - Method in class org.apache.nutch.collection.Subcollection
Returns whitelist String
getWriter() - Method in class org.apache.nutch.parse.html.DOMBuilder
Return null since there is no Writer for this class.
GONE - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Resource is gone.
guessEncoding(WebPage, String) - Method in class org.apache.nutch.util.EncodingDetector
Guess the encoding with the previously specified list of clues.
GZIPUtils - Class in org.apache.nutch.util
A collection of utility methods for working on GZIPed data.
GZIPUtils() - Constructor for class org.apache.nutch.util.GZIPUtils
 

H

HadoopFSUtil - Class in org.apache.nutch.util
 
HadoopFSUtil() - Constructor for class org.apache.nutch.util.HadoopFSUtil
 
handle(String, HttpServletRequest, HttpServletResponse, int) - Method in class org.apache.nutch.tools.proxy.AbstractTestbedHandler
 
handle(Request, HttpServletResponse, String, int) - Method in class org.apache.nutch.tools.proxy.AbstractTestbedHandler
 
handle(Request, HttpServletResponse, String, int) - Method in class org.apache.nutch.tools.proxy.DelayHandler
 
handle(Request, HttpServletResponse, String, int) - Method in class org.apache.nutch.tools.proxy.FakeHandler
 
handle(Request, HttpServletResponse, String, int) - Method in class org.apache.nutch.tools.proxy.LogDebugHandler
 
handle(Request, HttpServletResponse, String, int) - Method in class org.apache.nutch.tools.proxy.NotFoundHandler
 
hasArgs() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Checks whether the 'args' field has been set
hasArgs() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Checks whether the 'args' field has been set
hasBaseUrl() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'baseUrl' field has been set
hasBatchId() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'batchId' field has been set
hasCode() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Checks whether the 'code' field has been set
hasContent() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'content' field has been set
hasContentType() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'contentType' field has been set
hasCopy(String) - Method in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
hasFetchInterval() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'fetchInterval' field has been set
hasFetchTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'fetchTime' field has been set
hashCode() - Method in class org.apache.nutch.api.model.request.SeedList
 
hashCode() - Method in class org.apache.nutch.api.model.request.SeedUrl
 
hashCode() - Method in class org.apache.nutch.crawl.GeneratorJob.SelectorEntry
 
hashCode() - Method in class org.apache.nutch.plugin.PluginClassLoader
 
hashCode() - Method in class org.apache.nutch.protocol.httpclient.DummySSLProtocolSocketFactory
 
hashCode(byte[]) - Static method in class org.apache.nutch.util.Bytes
 
hashCode(byte[], int) - Static method in class org.apache.nutch.util.Bytes
 
hashCode() - Method in class org.apache.nutch.webui.model.SeedList
 
hashCode() - Method in class org.apache.nutch.webui.model.SeedUrl
 
hasHeaders() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'headers' field has been set
hasInlinks() - Method in class org.apache.nutch.storage.Host.Builder
Checks whether the 'inlinks' field has been set
hasInlinks() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'inlinks' field has been set
hasLastModified() - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Checks whether the 'lastModified' field has been set
hasMajorCode() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Checks whether the 'majorCode' field has been set
hasMarkers() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'markers' field has been set
hasMetadata() - Method in class org.apache.nutch.storage.Host.Builder
Checks whether the 'metadata' field has been set
hasMetadata() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'metadata' field has been set
hasMinorCode() - Method in class org.apache.nutch.storage.ParseStatus.Builder
Checks whether the 'minorCode' field has been set
hasModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'modifiedTime' field has been set
hasNext() - Method in class org.apache.nutch.api.impl.db.DbIterator
 
hasNext() - Method in class org.apache.nutch.util.NodeWalker
* Returns true if there are more nodes on the current stack.
hasOutlinks() - Method in class org.apache.nutch.storage.Host.Builder
Checks whether the 'outlinks' field has been set
hasOutlinks() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'outlinks' field has been set
hasParseStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'parseStatus' field has been set
hasPrevFetchTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'prevFetchTime' field has been set
hasPrevModifiedTime() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'prevModifiedTime' field has been set
hasPrevSignature() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'prevSignature' field has been set
hasProtocolStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'protocolStatus' field has been set
hasReprUrl() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'reprUrl' field has been set
hasRetriesSinceFetch() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'retriesSinceFetch' field has been set
hasScore() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'score' field has been set
hasSignature() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'signature' field has been set
hasStatus() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'status' field has been set
hasText() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'text' field has been set
hasTitle() - Method in class org.apache.nutch.storage.WebPage.Builder
Checks whether the 'title' field has been set
head(byte[], int) - Static method in class org.apache.nutch.util.Bytes
 
Histogram<E> - Class in org.apache.nutch.util
 
Histogram() - Constructor for class org.apache.nutch.util.Histogram
 
HOST - Static variable in interface org.apache.nutch.indexwriter.elastic.ElasticConstants
 
Host - Class in org.apache.nutch.storage
 
Host() - Constructor for class org.apache.nutch.storage.Host
 
Host.Builder - Class in org.apache.nutch.storage
RecordBuilder for Host instances.
Host.Field - Enum in org.apache.nutch.storage
Enum containing all data bean's fields.
Host.Tombstone - Class in org.apache.nutch.storage
 
HostDb - Class in org.apache.nutch.host
A caching wrapper for the host datastore.
HostDb(Configuration) - Constructor for class org.apache.nutch.host.HostDb
 
HOSTDB_CONCURRENCY_LEVEL - Static variable in class org.apache.nutch.host.HostDb
 
HOSTDB_LRU_SIZE - Static variable in class org.apache.nutch.host.HostDb
 
HostDbReader - Class in org.apache.nutch.host
Display entries from the hostDB.
HostDbReader() - Constructor for class org.apache.nutch.host.HostDbReader
 
HostDbUpdateJob - Class in org.apache.nutch.host
Scans the web table and create host entries for each unique host.
HostDbUpdateJob() - Constructor for class org.apache.nutch.host.HostDbUpdateJob
 
HostDbUpdateJob(Configuration) - Constructor for class org.apache.nutch.host.HostDbUpdateJob
 
HostDbUpdateJob.Mapper - Class in org.apache.nutch.host
Maps each WebPage to a host key.
HostDbUpdateJob.Mapper() - Constructor for class org.apache.nutch.host.HostDbUpdateJob.Mapper
 
HostDbUpdateReducer - Class in org.apache.nutch.host
Combines all WebPages with the same host key to create a Host object, with some statistics.
HostDbUpdateReducer() - Constructor for class org.apache.nutch.host.HostDbUpdateReducer
 
HostInjectorJob - Class in org.apache.nutch.host
Creates or updates an existing host table from a text file.
The files contain one host name per line, optionally followed by custom metadata separated by tabs with the metadata key is separated from the corresponding value by '='.
HostInjectorJob() - Constructor for class org.apache.nutch.host.HostInjectorJob
 
HostInjectorJob(Configuration) - Constructor for class org.apache.nutch.host.HostInjectorJob
 
HostInjectorJob.UrlMapper - Class in org.apache.nutch.host
 
HostInjectorJob.UrlMapper() - Constructor for class org.apache.nutch.host.HostInjectorJob.UrlMapper
 
HtmlIndexingFilter - Class in org.apache.nutch.indexer.html
Add raw HTML content of a document to the index.
HtmlIndexingFilter() - Constructor for class org.apache.nutch.indexer.html.HtmlIndexingFilter
 
HTMLLanguageParser - Class in org.apache.nutch.analysis.lang
Adds metadata identifying language of document if found We could also run statistical analysis here but we'd miss all other formats
HTMLLanguageParser() - Constructor for class org.apache.nutch.analysis.lang.HTMLLanguageParser
 
HTMLMetaProcessor - Class in org.apache.nutch.parse.html
Class for parsing META Directives from DOM trees.
HTMLMetaProcessor() - Constructor for class org.apache.nutch.parse.html.HTMLMetaProcessor
 
HTMLMetaProcessor - Class in org.apache.nutch.parse.tika
Class for parsing META Directives from DOM trees.
HTMLMetaProcessor() - Constructor for class org.apache.nutch.parse.tika.HTMLMetaProcessor
 
HTMLMetaTags - Class in org.apache.nutch.parse
This class holds the information about HTML "meta" tags extracted from a page.
HTMLMetaTags() - Constructor for class org.apache.nutch.parse.HTMLMetaTags
 
HTMLPARSEFILTER_ORDER - Static variable in class org.apache.nutch.parse.ParseFilters
 
HtmlParser - Class in org.apache.nutch.parse.html
 
HtmlParser() - Constructor for class org.apache.nutch.parse.html.HtmlParser
 
Http - Class in org.apache.nutch.protocol.http
 
Http() - Constructor for class org.apache.nutch.protocol.http.Http
 
Http - Class in org.apache.nutch.protocol.httpclient
This class is a protocol plugin that configures an HTTP client for Basic, Digest and NTLM authentication schemes for web server as well as proxy server.
Http() - Constructor for class org.apache.nutch.protocol.httpclient.Http
Constructs this plugin.
HttpAuthentication - Interface in org.apache.nutch.protocol.httpclient
The base level of services required for Http Authentication
HttpAuthenticationException - Exception in org.apache.nutch.protocol.httpclient
Can be used to identify problems during creation of Authentication objects.
HttpAuthenticationException() - Constructor for exception org.apache.nutch.protocol.httpclient.HttpAuthenticationException
Constructs a new exception with null as its detail message.
HttpAuthenticationException(String) - Constructor for exception org.apache.nutch.protocol.httpclient.HttpAuthenticationException
Constructs a new exception with the specified detail message.
HttpAuthenticationException(String, Throwable) - Constructor for exception org.apache.nutch.protocol.httpclient.HttpAuthenticationException
Constructs a new exception with the specified message and cause.
HttpAuthenticationException(Throwable) - Constructor for exception org.apache.nutch.protocol.httpclient.HttpAuthenticationException
Constructs a new exception with the specified cause and detail message from given clause if it is not null.
HttpAuthenticationFactory - Class in org.apache.nutch.protocol.httpclient
Provides the Http protocol implementation with the ability to authenticate when prompted.
HttpAuthenticationFactory(Configuration) - Constructor for class org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory
 
HttpBase - Class in org.apache.nutch.protocol.http.api
 
HttpBase() - Constructor for class org.apache.nutch.protocol.http.api.HttpBase
Creates a new instance of HttpBase
HttpBase(Logger) - Constructor for class org.apache.nutch.protocol.http.api.HttpBase
Creates a new instance of HttpBase
HttpBasicAuthentication - Class in org.apache.nutch.protocol.httpclient
Implementation of RFC 2617 Basic Authentication.
HttpBasicAuthentication(String, Configuration) - Constructor for class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
Construct an HttpBasicAuthentication for the given challenge parameters.
HttpDateFormat - Class in org.apache.nutch.net.protocols
class to handle HTTP dates.
HttpDateFormat() - Constructor for class org.apache.nutch.net.protocols.HttpDateFormat
 
HttpException - Exception in org.apache.nutch.protocol.http.api
 
HttpException() - Constructor for exception org.apache.nutch.protocol.http.api.HttpException
 
HttpException(String) - Constructor for exception org.apache.nutch.protocol.http.api.HttpException
 
HttpException(String, Throwable) - Constructor for exception org.apache.nutch.protocol.http.api.HttpException
 
HttpException(Throwable) - Constructor for exception org.apache.nutch.protocol.http.api.HttpException
 
HttpHeaders - Interface in org.apache.nutch.metadata
A collection of HTTP header names.
HttpResponse - Class in org.apache.nutch.protocol.http
An HTTP response.
HttpResponse(HttpBase, URL, WebPage) - Constructor for class org.apache.nutch.protocol.http.HttpResponse
 
HttpResponse - Class in org.apache.nutch.protocol.httpclient
An HTTP response.
HttpResponse.Scheme - Enum in org.apache.nutch.protocol.http
 
HttpRobotRulesParser - Class in org.apache.nutch.protocol.http.api
This class is used for parsing robots for urls belonging to HTTP protocol.
HttpRobotRulesParser(Configuration) - Constructor for class org.apache.nutch.protocol.http.api.HttpRobotRulesParser
 

I

ID_FIELD - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
ID_FIELD - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
IDENTIFIER - Static variable in interface org.apache.nutch.metadata.DublinCore
Recommended best practice is to identify the resource by means of a string or number conforming to a formal identification system.
IdentityPageReducer - Class in org.apache.nutch.util
 
IdentityPageReducer() - Constructor for class org.apache.nutch.util.IdentityPageReducer
 
ignorableWhitespace(char[], int, int) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of ignorable whitespace in element content.
in - Variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
incrementBytes(byte[], long) - Static method in class org.apache.nutch.util.Bytes
Bytewise binary increment/deincrement of long contained in byte array on given amount.
index(String) - Method in class org.apache.nutch.indexer.IndexingJob
 
index(String, WebPage) - Method in class org.apache.nutch.indexer.IndexUtil
Index a Webpage, here we add the following fields: id: default uniqueKey for the NutchDocument. digest: Digest is used to identify pages (like unique ID) and is used to remove duplicates during the dedup procedure.
INDEX - Static variable in interface org.apache.nutch.indexwriter.elastic.ElasticConstants
 
IndexCleaningFilter - Interface in org.apache.nutch.indexer
Extension point for indexing.
IndexCleaningFilter_ORDER - Static variable in class org.apache.nutch.indexer.IndexCleaningFilters
 
IndexCleaningFilters - Class in org.apache.nutch.indexer
Creates and caches IndexCleaningFilter implementing plugins.
IndexCleaningFilters(Configuration) - Constructor for class org.apache.nutch.indexer.IndexCleaningFilters
 
IndexerOutputFormat - Class in org.apache.nutch.indexer
 
IndexerOutputFormat() - Constructor for class org.apache.nutch.indexer.IndexerOutputFormat
 
indexerScore(String, NutchDocument, WebPage, float) - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
indexerScore(String, NutchDocument, WebPage, float) - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
Dampen the boost value by scorePower.
indexerScore(String, NutchDocument, WebPage, float) - Method in interface org.apache.nutch.scoring.ScoringFilter
This method calculates a Lucene document boost.
indexerScore(String, NutchDocument, WebPage, float) - Method in class org.apache.nutch.scoring.ScoringFilters
 
indexerScore(String, NutchDocument, WebPage, float) - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
IndexingException - Exception in org.apache.nutch.indexer
 
IndexingException() - Constructor for exception org.apache.nutch.indexer.IndexingException
 
IndexingException(String) - Constructor for exception org.apache.nutch.indexer.IndexingException
 
IndexingException(String, Throwable) - Constructor for exception org.apache.nutch.indexer.IndexingException
 
IndexingException(Throwable) - Constructor for exception org.apache.nutch.indexer.IndexingException
 
IndexingFilter - Interface in org.apache.nutch.indexer
Extension point for indexing.
INDEXINGFILTER_ORDER - Static variable in class org.apache.nutch.indexer.IndexingFilters
 
IndexingFilters - Class in org.apache.nutch.indexer
Creates and caches IndexingFilter implementing plugins.
IndexingFilters(Configuration) - Constructor for class org.apache.nutch.indexer.IndexingFilters
 
IndexingFiltersChecker - Class in org.apache.nutch.indexer
Reads and parses a URL and run the indexers on it.
IndexingFiltersChecker() - Constructor for class org.apache.nutch.indexer.IndexingFiltersChecker
 
IndexingJob - Class in org.apache.nutch.indexer
 
IndexingJob() - Constructor for class org.apache.nutch.indexer.IndexingJob
 
IndexingJob.IndexerMapper - Class in org.apache.nutch.indexer
 
IndexingJob.IndexerMapper() - Constructor for class org.apache.nutch.indexer.IndexingJob.IndexerMapper
 
indexUtil - Variable in class org.apache.nutch.indexer.IndexingJob.IndexerMapper
 
IndexUtil - Class in org.apache.nutch.indexer
Utility to create an indexed document from a webpage.
IndexUtil(Configuration) - Constructor for class org.apache.nutch.indexer.IndexUtil
 
IndexWriter - Interface in org.apache.nutch.indexer
 
IndexWriters - Class in org.apache.nutch.indexer
Creates and caches IndexWriter implementing plugins.
IndexWriters(Configuration) - Constructor for class org.apache.nutch.indexer.IndexWriters
 
inflate(byte[]) - Static method in class org.apache.nutch.util.DeflateUtils
Returns an inflated copy of the input array.
inflateBestEffort(byte[]) - Static method in class org.apache.nutch.util.DeflateUtils
Returns an inflated copy of the input array.
inflateBestEffort(byte[], int) - Static method in class org.apache.nutch.util.DeflateUtils
Returns an inflated copy of the input array, truncated to sizeLimit bytes, if necessary.
init() - Method in class org.apache.nutch.collection.CollectionManager
 
init(FilterConfig) - Method in class org.apache.nutch.tools.proxy.LogDebugHandler
 
init() - Method in class org.apache.nutch.webui.NutchUiApplication
 
initialize(Element) - Method in class org.apache.nutch.collection.Subcollection
Initialize Subcollection from dom element
initialize(InputSplit, TaskAttemptContext) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecordReader
 
initializeSchedule(String, WebPage) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
Initialize fetch schedule related data.
initializeSchedule(String, WebPage) - Method in interface org.apache.nutch.crawl.FetchSchedule
Initialize fetch schedule related data.
initialScore(String, WebPage) - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
initialScore(String, WebPage) - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
Set to 0.0f (unknown value) - inlink contributions will bring it to a correct level.
initialScore(String, WebPage) - Method in interface org.apache.nutch.scoring.ScoringFilter
Set an initial score for newly discovered pages.
initialScore(String, WebPage) - Method in class org.apache.nutch.scoring.ScoringFilters
Calculate a new initial score, used when adding newly discovered pages.
initialScore(String, WebPage) - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
initMapperJob(Job, Collection<WebPage.Field>, Class<K>, Class<V>, Class<? extends GoraMapper<String, WebPage, K, V>>) - Static method in class org.apache.nutch.storage.StorageUtils
 
initMapperJob(Job, Collection<WebPage.Field>, Class<K>, Class<V>, Class<? extends GoraMapper<String, WebPage, K, V>>, Class<? extends Partitioner<K, V>>) - Static method in class org.apache.nutch.storage.StorageUtils
 
initMapperJob(Job, Collection<WebPage.Field>, Class<K>, Class<V>, Class<? extends GoraMapper<String, WebPage, K, V>>, Class<? extends Partitioner<K, V>>, boolean) - Static method in class org.apache.nutch.storage.StorageUtils
 
initMapperJob(Job, Collection<WebPage.Field>, Class<K>, Class<V>, Class<? extends GoraMapper<String, WebPage, K, V>>, Class<? extends Partitioner<K, V>>, Filter<String, WebPage>, boolean) - Static method in class org.apache.nutch.storage.StorageUtils
 
initMapperJob(Job, Collection<WebPage.Field>, Class<K>, Class<V>, Class<? extends GoraMapper<String, WebPage, K, V>>, Filter<String, WebPage>) - Static method in class org.apache.nutch.storage.StorageUtils
 
initPage(IModel<SeedList>) - Method in class org.apache.nutch.webui.pages.seed.SeedPage
 
initReducerJob(Job, Class<? extends GoraReducer<K, V, String, WebPage>>) - Static method in class org.apache.nutch.storage.StorageUtils
 
inject(Path) - Method in class org.apache.nutch.crawl.InjectorJob
 
inject(Path) - Method in class org.apache.nutch.host.HostInjectorJob
 
injectedScore(String, WebPage) - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
injectedScore(String, WebPage) - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
 
injectedScore(String, WebPage) - Method in interface org.apache.nutch.scoring.ScoringFilter
Set an initial score for newly injected pages.
injectedScore(String, WebPage) - Method in class org.apache.nutch.scoring.ScoringFilters
Calculate a new initial score, used when injecting new pages.
injectedScore(String, WebPage) - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
InjectorJob - Class in org.apache.nutch.crawl
This class takes a flat file of URLs and adds them to the of pages to be crawled.
InjectorJob() - Constructor for class org.apache.nutch.crawl.InjectorJob
 
InjectorJob(Configuration) - Constructor for class org.apache.nutch.crawl.InjectorJob
 
InjectorJob.UrlMapper - Class in org.apache.nutch.crawl
 
InjectorJob.UrlMapper() - Constructor for class org.apache.nutch.crawl.InjectorJob.UrlMapper
 
instance(JobInfo.JobType) - Static method in class org.apache.nutch.webui.client.impl.RemoteCommandBuilder
 
instance() - Static method in class org.apache.nutch.webui.pages.assets.NutchUiCssReference
 
InstancePanel - Class in org.apache.nutch.webui.pages.instances
 
InstancePanel(String) - Constructor for class org.apache.nutch.webui.pages.instances.InstancePanel
 
InstancesPage - Class in org.apache.nutch.webui.pages.instances
 
InstancesPage() - Constructor for class org.apache.nutch.webui.pages.instances.InstancesPage
 
isAllowed(URL) - Method in interface org.apache.nutch.protocol.RobotRules
Returns false if the robots.txt file prohibits us from accessing the given url, or true otherwise.
isArgsDirty(List<CharSequence>) - Method in class org.apache.nutch.storage.ParseStatus
Checks the dirty status of the 'args' field.
isArgsDirty(List<CharSequence>) - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Checks the dirty status of the 'args' field.
isArgsDirty(List<CharSequence>) - Method in class org.apache.nutch.storage.ProtocolStatus
Checks the dirty status of the 'args' field.
isArgsDirty(List<CharSequence>) - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Checks the dirty status of the 'args' field.
isBaseUrlDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'baseUrl' field.
isBaseUrlDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'baseUrl' field.
isBatchIdDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'batchId' field.
isBatchIdDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'batchId' field.
isClientTrusted(X509Certificate[]) - Method in class org.apache.nutch.protocol.httpclient.DummyX509TrustManager
 
isCodeDirty(Integer) - Method in class org.apache.nutch.storage.ProtocolStatus
Checks the dirty status of the 'code' field.
isCodeDirty(Integer) - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Checks the dirty status of the 'code' field.
isContentDirty(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'content' field.
isContentDirty(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'content' field.
isContentTypeDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'contentType' field.
isContentTypeDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'contentType' field.
isDomainSuffix(String) - Method in class org.apache.nutch.util.domain.DomainSuffixes
return whether the extension is a registered domain entry
isEmpty(String) - Static method in class org.apache.nutch.util.StringUtil
Checks if a string is empty (ie is null or empty).
isFetchIntervalDirty(Integer) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'fetchInterval' field.
isFetchIntervalDirty(Integer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'fetchInterval' field.
isFetchTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'fetchTime' field.
isFetchTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'fetchTime' field.
isForce() - Method in class org.apache.nutch.api.model.request.NutchConfig
 
isHeadersDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'headers' field.
isHeadersDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'headers' field.
isIgnoreCase() - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
isInlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host
Checks the dirty status of the 'inlinks' field.
isInlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host.Tombstone
Checks the dirty status of the 'inlinks' field.
isInlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'inlinks' field.
isInlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'inlinks' field.
isKeysReversed() - Method in class org.apache.nutch.api.model.request.DbFilter
 
isLastModifiedDirty(Long) - Method in class org.apache.nutch.storage.ProtocolStatus
Checks the dirty status of the 'lastModified' field.
isLastModifiedDirty(Long) - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Checks the dirty status of the 'lastModified' field.
isMagic(byte[]) - Static method in class org.apache.nutch.tools.arc.ArcRecordReader
Returns true if the byte array passed matches the gzip header magic number.
isMajorCodeDirty(Integer) - Method in class org.apache.nutch.storage.ParseStatus
Checks the dirty status of the 'majorCode' field.
isMajorCodeDirty(Integer) - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Checks the dirty status of the 'majorCode' field.
isMarkersDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'markers' field.
isMarkersDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'markers' field.
isMetadataDirty(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.Host
Checks the dirty status of the 'metadata' field.
isMetadataDirty(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.Host.Tombstone
Checks the dirty status of the 'metadata' field.
isMetadataDirty(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'metadata' field.
isMetadataDirty(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'metadata' field.
isMinorCodeDirty(Integer) - Method in class org.apache.nutch.storage.ParseStatus
Checks the dirty status of the 'minorCode' field.
isMinorCodeDirty(Integer) - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Checks the dirty status of the 'minorCode' field.
isModeAccept() - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
isModifiedTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'modifiedTime' field.
isModifiedTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'modifiedTime' field.
isMultiValued(String) - Method in class org.apache.nutch.metadata.Metadata
Returns true if named value is multivalued.
isOutlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host
Checks the dirty status of the 'outlinks' field.
isOutlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host.Tombstone
Checks the dirty status of the 'outlinks' field.
isOutlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'outlinks' field.
isOutlinksDirty(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'outlinks' field.
isParseStatusDirty(ParseStatus) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'parseStatus' field.
isParseStatusDirty(ParseStatus) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'parseStatus' field.
isPrevFetchTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'prevFetchTime' field.
isPrevFetchTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'prevFetchTime' field.
isPrevModifiedTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'prevModifiedTime' field.
isPrevModifiedTimeDirty(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'prevModifiedTime' field.
isPrevSignatureDirty(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'prevSignature' field.
isPrevSignatureDirty(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'prevSignature' field.
isProtocolStatusDirty(ProtocolStatus) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'protocolStatus' field.
isProtocolStatusDirty(ProtocolStatus) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'protocolStatus' field.
isRemoteVerificationEnabled() - Method in class org.apache.nutch.protocol.ftp.Client
Return whether or not verification of the remote host participating in data connections is enabled.
isReprUrlDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'reprUrl' field.
isReprUrlDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'reprUrl' field.
isRetriesSinceFetchDirty(Integer) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'retriesSinceFetch' field.
isRetriesSinceFetchDirty(Integer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'retriesSinceFetch' field.
isRunning() - Method in class org.apache.nutch.api.NutchServer
Convenience method to determine whether a Nutch server is running.
isSameDomainName(URL, URL) - Static method in class org.apache.nutch.util.URLUtil
Returns whether the given urls have the same domain name.
isSameDomainName(String, String) - Static method in class org.apache.nutch.util.URLUtil
Returns whether the given urls have the same domain name.
isScoreDirty(Float) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'score' field.
isScoreDirty(Float) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'score' field.
isServerTrusted(X509Certificate[]) - Method in class org.apache.nutch.protocol.httpclient.DummyX509TrustManager
 
isSignatureDirty(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'signature' field.
isSignatureDirty(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'signature' field.
isStatusDirty(Integer) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'status' field.
isStatusDirty(Integer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'status' field.
isSuccess(ParseStatus) - Static method in class org.apache.nutch.parse.ParseStatusUtils
 
isSuccess() - Method in class org.apache.nutch.storage.ProtocolStatus
A convenience method which returns a successful ProtocolStatus.
isTextDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'text' field.
isTextDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'text' field.
isTitleDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Checks the dirty status of the 'title' field.
isTitleDirty(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Checks the dirty status of the 'title' field.
isTruncated(String, WebPage) - Static method in class org.apache.nutch.parse.ParserJob
Checks if the page's content is truncated.
isWhiteSpace(char) - Static method in class org.apache.nutch.parse.html.XMLCharacterRecognizer
Returns whether the specified ch conforms to the XML 1.0 definition of whitespace.
isWhiteSpace(char[], int, int) - Static method in class org.apache.nutch.parse.html.XMLCharacterRecognizer
Tell if the string is whitespace.
isWhiteSpace(StringBuffer) - Static method in class org.apache.nutch.parse.html.XMLCharacterRecognizer
Tell if the string is whitespace.
isWhiteSpace(String) - Static method in class org.apache.nutch.parse.html.XMLCharacterRecognizer
Tell if the string is whitespace.
iterateOnSplits(byte[], byte[], int) - Static method in class org.apache.nutch.util.Bytes
Iterate over keys within the passed inclusive range.
iterator() - Method in class org.apache.nutch.indexer.NutchDocument
Iterate over all fields.

J

JobConfig - Class in org.apache.nutch.api.model.request
 
JobConfig() - Constructor for class org.apache.nutch.api.model.request.JobConfig
 
JobConfig - Class in org.apache.nutch.webui.client.model
 
JobConfig() - Constructor for class org.apache.nutch.webui.client.model.JobConfig
 
JobFactory - Class in org.apache.nutch.api.impl
 
JobFactory() - Constructor for class org.apache.nutch.api.impl.JobFactory
 
JobInfo - Class in org.apache.nutch.api.model.response
 
JobInfo(String, JobConfig, JobInfo.State, String) - Constructor for class org.apache.nutch.api.model.response.JobInfo
 
JobInfo - Class in org.apache.nutch.webui.client.model
 
JobInfo() - Constructor for class org.apache.nutch.webui.client.model.JobInfo
 
JobInfo.JobType - Enum in org.apache.nutch.webui.client.model
 
JobInfo.State - Enum in org.apache.nutch.api.model.response
 
JobInfo.State - Enum in org.apache.nutch.webui.client.model
 
JobManager - Interface in org.apache.nutch.api
 
jobManager - Variable in class org.apache.nutch.api.resources.AbstractResource
 
JobManager.JobType - Enum in org.apache.nutch.api
 
JobResource - Class in org.apache.nutch.api.resources
 
JobResource() - Constructor for class org.apache.nutch.api.resources.JobResource
 
JobWorker - Class in org.apache.nutch.api.impl
 
JobWorker(JobConfig, Configuration, NutchTool) - Constructor for class org.apache.nutch.api.impl.JobWorker
 
JSParseFilter - Class in org.apache.nutch.parse.js
This class is a heuristic link extractor for JavaScript files and code snippets.
JSParseFilter() - Constructor for class org.apache.nutch.parse.js.JSParseFilter
 

K

killJob() - Method in class org.apache.nutch.api.impl.JobWorker
 
killJob() - Method in class org.apache.nutch.util.NutchTool
Kill the job immediately.

L

LANGUAGE - Static variable in interface org.apache.nutch.metadata.DublinCore
A language of the intellectual content of the resource.
LanguageIndexingFilter - Class in org.apache.nutch.analysis.lang
An IndexingFilter that adds a lang (language) field to the document.
LanguageIndexingFilter() - Constructor for class org.apache.nutch.analysis.lang.LanguageIndexingFilter
Constructs a new Language Indexing Filter.
LAST_MODIFIED - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
leftPad(String, int) - Static method in class org.apache.nutch.util.StringUtil
Returns a copy of s padded with leading spaces so that it's length is length.
LICENSE_LOCATION - Static variable in interface org.apache.nutch.metadata.CreativeCommons
 
LICENSE_URL - Static variable in interface org.apache.nutch.metadata.CreativeCommons
 
LinkAnalysisScoringFilter - Class in org.apache.nutch.scoring.link
 
LinkAnalysisScoringFilter() - Constructor for class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
list() - Method in interface org.apache.nutch.api.ConfManager
 
list() - Method in class org.apache.nutch.api.impl.RAMConfManager
 
list(String, JobInfo.State) - Method in class org.apache.nutch.api.impl.RAMJobManager
 
list(String, JobInfo.State) - Method in interface org.apache.nutch.api.JobManager
 
LOCATION - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
LockUtil - Class in org.apache.nutch.util
Utility methods for handling application-level locking.
LockUtil() - Constructor for class org.apache.nutch.util.LockUtil
 
LOG - Static variable in class org.apache.nutch.analysis.lang.HTMLLanguageParser
 
LOG - Static variable in class org.apache.nutch.crawl.DbUpdateMapper
 
LOG - Static variable in class org.apache.nutch.crawl.DbUpdateReducer
 
LOG - Static variable in class org.apache.nutch.crawl.DbUpdaterJob
 
LOG - Static variable in class org.apache.nutch.crawl.FetchScheduleFactory
 
LOG - Static variable in class org.apache.nutch.crawl.GeneratorJob
 
LOG - Static variable in class org.apache.nutch.crawl.InjectorJob
 
LOG - Static variable in class org.apache.nutch.crawl.WebTableReader
 
LOG - Static variable in class org.apache.nutch.fetcher.FetcherJob
 
LOG - Static variable in class org.apache.nutch.fetcher.FetcherReducer
 
LOG - Static variable in class org.apache.nutch.host.HostDb
 
LOG - Static variable in class org.apache.nutch.host.HostDbReader
 
LOG - Static variable in class org.apache.nutch.host.HostDbUpdateJob
 
LOG - Static variable in class org.apache.nutch.host.HostInjectorJob
 
LOG - Static variable in class org.apache.nutch.indexer.anchor.AnchorIndexingFilter
 
LOG - Static variable in class org.apache.nutch.indexer.basic.BasicIndexingFilter
 
LOG - Static variable in class org.apache.nutch.indexer.CleaningJob
 
LOG - Static variable in class org.apache.nutch.indexer.html.HtmlIndexingFilter
 
LOG - Static variable in class org.apache.nutch.indexer.IndexCleaningFilters
 
LOG - Static variable in class org.apache.nutch.indexer.IndexingFilters
 
LOG - Static variable in class org.apache.nutch.indexer.IndexingFiltersChecker
 
LOG - Static variable in class org.apache.nutch.indexer.IndexingJob
 
LOG - Static variable in class org.apache.nutch.indexer.IndexWriters
 
LOG - Static variable in class org.apache.nutch.indexer.more.MoreIndexingFilter
 
LOG - Static variable in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
LOG - Static variable in class org.apache.nutch.indexer.solr.SolrUtils
 
LOG - Static variable in class org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter
Logger
LOG - Static variable in class org.apache.nutch.indexer.tld.TLDIndexingFilter
 
LOG - Static variable in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
LOG - Static variable in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
LOG - Static variable in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
LOG - Static variable in class org.apache.nutch.indexwriter.solr.SolrUtils
 
LOG - Static variable in class org.apache.nutch.microformats.reltag.RelTagParser
 
LOG - Static variable in class org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer
 
LOG - Static variable in class org.apache.nutch.net.URLNormalizers
 
LOG - Static variable in class org.apache.nutch.parse.html.HtmlParser
 
LOG - Static variable in class org.apache.nutch.parse.js.JSParseFilter
 
LOG - Static variable in class org.apache.nutch.parse.ParsePluginsReader
 
LOG - Static variable in class org.apache.nutch.parse.ParserChecker
 
LOG - Static variable in class org.apache.nutch.parse.ParserFactory
 
LOG - Static variable in class org.apache.nutch.parse.ParserJob
 
LOG - Static variable in class org.apache.nutch.parse.ParseUtil
 
LOG - Static variable in class org.apache.nutch.parse.tika.TikaParser
 
LOG - Static variable in class org.apache.nutch.plugin.PluginDescriptor
 
LOG - Static variable in class org.apache.nutch.plugin.PluginManifestParser
 
LOG - Static variable in class org.apache.nutch.plugin.PluginRepository
 
LOG - Static variable in class org.apache.nutch.protocol.file.File
 
LOG - Static variable in class org.apache.nutch.protocol.ftp.Ftp
 
LOG - Static variable in class org.apache.nutch.protocol.ftp.FtpRobotRulesParser
 
LOG - Static variable in class org.apache.nutch.protocol.http.api.HttpRobotRulesParser
 
LOG - Static variable in class org.apache.nutch.protocol.http.Http
 
LOG - Static variable in class org.apache.nutch.protocol.httpclient.Http
 
LOG - Static variable in class org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory
 
LOG - Static variable in class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
 
LOG - Static variable in class org.apache.nutch.protocol.ProtocolFactory
 
LOG - Static variable in class org.apache.nutch.protocol.RobotRulesParser
 
LOG - Static variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
LOG - Static variable in class org.apache.nutch.tools.DmozParser
 
LOG - Static variable in class org.apache.nutch.tools.ResolveUrls
 
LOG - Static variable in class org.apache.nutch.util.EncodingDetector
 
LOG - Static variable in class org.creativecommons.nutch.CCIndexingFilter
 
LOG - Static variable in class org.creativecommons.nutch.CCParseFilter
 
logConf() - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
LogDebugHandler - Class in org.apache.nutch.tools.proxy
 
LogDebugHandler() - Constructor for class org.apache.nutch.tools.proxy.LogDebugHandler
 
login(String, String) - Method in class org.apache.nutch.protocol.ftp.Client
Login to the FTP server using the provided username and password.
logout() - Method in class org.apache.nutch.protocol.ftp.Client
Logout of the FTP server by sending the QUIT command.
LogOutPage - Class in org.apache.nutch.webui.pages
 
LogOutPage() - Constructor for class org.apache.nutch.webui.pages.LogOutPage
 
longestMatch(String) - Method in class org.apache.nutch.util.PrefixStringMatcher
Returns the longest prefix of input that is matched, or null if no match exists.
longestMatch(String) - Method in class org.apache.nutch.util.SuffixStringMatcher
Returns the longest suffix of input that is matched, or null if no match exists.
longestMatch(String) - Method in class org.apache.nutch.util.TrieStringMatcher
Returns the longest substring of input that is matched by a pattern in the trie, or null if no match exists.

M

m_currentNode - Variable in class org.apache.nutch.parse.html.DOMBuilder
Current node
m_doc - Variable in class org.apache.nutch.parse.html.DOMBuilder
Root document
m_docFrag - Variable in class org.apache.nutch.parse.html.DOMBuilder
First node of document fragment or null if not a DocumentFragment
m_elemStack - Variable in class org.apache.nutch.parse.html.DOMBuilder
Vector of element nodes
m_inCData - Variable in class org.apache.nutch.parse.html.DOMBuilder
Flag indicating that we are processing a CData section
main(String[]) - Static method in class org.apache.nutch.api.NutchServer
 
main(String[]) - Static method in class org.apache.nutch.crawl.DbUpdaterJob
 
main(String[]) - Static method in class org.apache.nutch.crawl.GeneratorJob
 
main(String[]) - Static method in class org.apache.nutch.crawl.InjectorJob
 
main(String[]) - Static method in class org.apache.nutch.crawl.WebTableReader
 
main(String[]) - Static method in class org.apache.nutch.fetcher.FetcherJob
 
main(String[]) - Static method in class org.apache.nutch.host.HostDbReader
 
main(String[]) - Static method in class org.apache.nutch.host.HostDbUpdateJob
 
main(String[]) - Static method in class org.apache.nutch.host.HostInjectorJob
 
main(String[]) - Static method in class org.apache.nutch.indexer.CleaningJob
 
main(String[]) - Static method in class org.apache.nutch.indexer.IndexingFiltersChecker
 
main(String[]) - Static method in class org.apache.nutch.indexer.IndexingJob
 
main(String[]) - Static method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
main(String[]) - Static method in class org.apache.nutch.net.protocols.HttpDateFormat
 
main(String[]) - Static method in class org.apache.nutch.net.URLFilterChecker
 
main(String[]) - Static method in class org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer
 
main(String[]) - Static method in class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
Spits out patterns and substitutions that are in the configuration file.
main(String[]) - Static method in class org.apache.nutch.net.URLNormalizerChecker
 
main(String[]) - Static method in class org.apache.nutch.parse.html.HtmlParser
 
main(String[]) - Static method in class org.apache.nutch.parse.js.JSParseFilter
Main method which can be run from command line with the plugin option.
main(String[]) - Static method in class org.apache.nutch.parse.ParsePluginsReader
Tests parsing of the parse-plugins.xml file.
main(String[]) - Static method in class org.apache.nutch.parse.ParserChecker
 
main(String[]) - Static method in class org.apache.nutch.parse.ParserJob
 
main(String[]) - Static method in class org.apache.nutch.parse.tika.TikaParser
 
main(String[]) - Static method in class org.apache.nutch.plugin.PluginRepository
Loads all necessary dependencies for a selected plugin, and then runs one of the classes' main() method.
main(String[]) - Static method in class org.apache.nutch.protocol.Content
 
main(String[]) - Static method in class org.apache.nutch.protocol.file.File
Quick way for running this class.
main(String[]) - Static method in class org.apache.nutch.protocol.ftp.Ftp
For debugging.
main(HttpBase, String[]) - Static method in class org.apache.nutch.protocol.http.api.HttpBase
 
main(String[]) - Static method in class org.apache.nutch.protocol.http.Http
 
main(String[]) - Static method in class org.apache.nutch.protocol.httpclient.Http
Main method.
main(String[]) - Static method in class org.apache.nutch.protocol.RobotRulesParser
command-line main for testing
main(String[]) - Static method in class org.apache.nutch.storage.WebTableCreator
 
main(String[]) - Static method in class org.apache.nutch.tools.Benchmark
 
main(String[]) - Static method in class org.apache.nutch.tools.DmozParser
Command-line access.
main(String[]) - Static method in class org.apache.nutch.tools.proxy.TestbedProxy
 
main(String[]) - Static method in class org.apache.nutch.tools.ResolveUrls
Runs the resolve urls tool.
main(RegexURLFilterBase, String[]) - Static method in class org.apache.nutch.urlfilter.api.RegexURLFilterBase
Filter the standard input using a RegexURLFilterBase.
main(String[]) - Static method in class org.apache.nutch.urlfilter.automaton.AutomatonURLFilter
 
main(String[]) - Static method in class org.apache.nutch.urlfilter.prefix.PrefixURLFilter
 
main(String[]) - Static method in class org.apache.nutch.urlfilter.regex.RegexURLFilter
 
main(String[]) - Static method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
main(String[]) - Static method in class org.apache.nutch.util.CommandRunner
 
main(String[]) - Static method in class org.apache.nutch.util.domain.DomainStatistics
 
main(String[]) - Static method in class org.apache.nutch.util.PrefixStringMatcher
 
main(String[]) - Static method in class org.apache.nutch.util.StringUtil
 
main(String[]) - Static method in class org.apache.nutch.util.SuffixStringMatcher
 
main(String[]) - Static method in class org.apache.nutch.util.URLUtil
For testing
main(String[]) - Static method in class org.apache.nutch.webui.NutchUiServer
 
majorCodes - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
 
makeIOException(ElasticsearchException) - Static method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
makeIOException(SolrServerException) - Static method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
makeStatus(int) - Static method in class org.apache.nutch.protocol.ProtocolStatusUtils
 
makeStatus(int, String) - Static method in class org.apache.nutch.protocol.ProtocolStatusUtils
 
makeStatus(int, URL) - Static method in class org.apache.nutch.protocol.ProtocolStatusUtils
 
map(String, WebPage, Mapper<String, WebPage, UrlWithScore, NutchWritable>.Context) - Method in class org.apache.nutch.crawl.DbUpdateMapper
 
map(String, WebPage, Mapper<String, WebPage, GeneratorJob.SelectorEntry, WebPage>.Context) - Method in class org.apache.nutch.crawl.GeneratorMapper
 
map(LongWritable, Text, Mapper<LongWritable, Text, String, WebPage>.Context) - Method in class org.apache.nutch.crawl.InjectorJob.UrlMapper
 
map(String, WebPage, Mapper<String, WebPage, Text, Text>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableRegexMapper
 
map(String, WebPage, Mapper<String, WebPage, Text, LongWritable>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatMapper
 
map(String, WebPage, Mapper<String, WebPage, IntWritable, FetchEntry>.Context) - Method in class org.apache.nutch.fetcher.FetcherJob.FetcherMapper
 
map(String, WebPage, Mapper<String, WebPage, Text, WebPage>.Context) - Method in class org.apache.nutch.host.HostDbUpdateJob.Mapper
 
map(LongWritable, Text, Mapper<LongWritable, Text, String, Host>.Context) - Method in class org.apache.nutch.host.HostInjectorJob.UrlMapper
 
map(String, WebPage, Mapper<String, WebPage, String, WebPage>.Context) - Method in class org.apache.nutch.indexer.CleaningJob.CleanMapper
 
map(String, WebPage, Mapper<String, WebPage, String, NutchDocument>.Context) - Method in class org.apache.nutch.indexer.IndexingJob.IndexerMapper
 
map(String, WebPage, Mapper<String, WebPage, String, WebPage>.Context) - Method in class org.apache.nutch.parse.ParserJob.ParserMapper
 
map(String, WebPage, Mapper<String, WebPage, Text, LongWritable>.Context) - Method in class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsMapper
 
mapCopyKey(String) - Method in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
mapKey(String) - Method in class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
mapKey(byte[]) - Static method in class org.apache.nutch.util.Bytes
 
mapKey(byte[], int) - Static method in class org.apache.nutch.util.Bytes
 
MAPPING_FILE - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
MAPPING_FILE - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
Mark - Enum in org.apache.nutch.storage
 
match(String) - Method in class org.apache.nutch.urlfilter.api.RegexRule
Checks if a url matches this rule.
matchChar(TrieStringMatcher.TrieNode, String, int) - Method in class org.apache.nutch.util.TrieStringMatcher
Returns the next TrieStringMatcher.TrieNode visited, given that you are at node, and the the next character in the input is the idx'th character of s.
matches(String) - Method in class org.apache.nutch.util.PrefixStringMatcher
Returns true if the given String is matched by a prefix in the trie
matches(String) - Method in class org.apache.nutch.util.SuffixStringMatcher
Returns true if the given String is matched by a suffix in the trie
matches(String) - Method in class org.apache.nutch.util.TrieStringMatcher
Returns true if the given String is matched by a pattern in the trie
MAX_BULK_DOCS - Static variable in interface org.apache.nutch.indexwriter.elastic.ElasticConstants
 
MAX_BULK_LENGTH - Static variable in interface org.apache.nutch.indexwriter.elastic.ElasticConstants
 
maxContent - Variable in class org.apache.nutch.protocol.http.api.HttpBase
The length limit for downloaded content, in bytes.
maxInterval - Variable in class org.apache.nutch.crawl.AbstractFetchSchedule
 
MD5Signature - Class in org.apache.nutch.crawl
Default implementation of a page signature.
MD5Signature() - Constructor for class org.apache.nutch.crawl.MD5Signature
 
Metadata - Class in org.apache.nutch.metadata
A multi-valued metadata container.
Metadata() - Constructor for class org.apache.nutch.metadata.Metadata
Constructs a new, empty metadata.
MetadataIndexer - Class in org.apache.nutch.indexer.metadata
Indexer which can be configured to extract metadata from the crawldb, parse metadata or content metadata.
MetadataIndexer() - Constructor for class org.apache.nutch.indexer.metadata.MetadataIndexer
 
MetaTagsParser - Class in org.apache.nutch.parse.metatags
Parse HTML meta tags (keywords, description) and store them in the parse metadata so that they can be indexed with the index-metadata plugin with the prefix 'metatag.'.
MetaTagsParser() - Constructor for class org.apache.nutch.parse.metatags.MetaTagsParser
 
MetaWrapper - Class in org.apache.nutch.metadata
This is a simple decorator that adds metadata to any Writable-s that can be serialized by NutchWritable.
MetaWrapper() - Constructor for class org.apache.nutch.metadata.MetaWrapper
 
MetaWrapper(Writable, Configuration) - Constructor for class org.apache.nutch.metadata.MetaWrapper
 
MetaWrapper(Metadata, Writable, Configuration) - Constructor for class org.apache.nutch.metadata.MetaWrapper
 
MimeUtil - Class in org.apache.nutch.util
 
MimeUtil(Configuration) - Constructor for class org.apache.nutch.util.MimeUtil
 
MIN_CONFIDENCE_KEY - Static variable in class org.apache.nutch.util.EncodingDetector
 
minorCodes - Static variable in class org.apache.nutch.parse.ParseStatusUtils
 
MissingDependencyException - Exception in org.apache.nutch.plugin
MissingDependencyException will be thrown if a plugin dependency cannot be found.
MissingDependencyException(Throwable) - Constructor for exception org.apache.nutch.plugin.MissingDependencyException
 
MissingDependencyException(String) - Constructor for exception org.apache.nutch.plugin.MissingDependencyException
 
model(T) - Method in class org.apache.nutch.webui.pages.components.CpmIteratorAdapter
 
MODIFIED - Static variable in interface org.apache.nutch.metadata.DublinCore
Date on which the resource was changed.
MoreIndexingFilter - Class in org.apache.nutch.indexer.more
Add (or reset) a few metaData properties as respective fields (if they are available), so that they can be accurately used within the search index.
MoreIndexingFilter() - Constructor for class org.apache.nutch.indexer.more.MoreIndexingFilter
 
MOVED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Resource has moved permanently.

N

names() - Method in class org.apache.nutch.metadata.Metadata
Returns an array of the names contained in the metadata.
newBuilder() - Static method in class org.apache.nutch.storage.Host
Creates a new Host RecordBuilder
newBuilder(Host.Builder) - Static method in class org.apache.nutch.storage.Host
Creates a new Host RecordBuilder by copying an existing Builder
newBuilder(Host) - Static method in class org.apache.nutch.storage.Host
Creates a new Host RecordBuilder by copying an existing Host instance
newBuilder() - Static method in class org.apache.nutch.storage.ParseStatus
Creates a new ParseStatus RecordBuilder
newBuilder(ParseStatus.Builder) - Static method in class org.apache.nutch.storage.ParseStatus
Creates a new ParseStatus RecordBuilder by copying an existing Builder
newBuilder(ParseStatus) - Static method in class org.apache.nutch.storage.ParseStatus
Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus instance
newBuilder() - Static method in class org.apache.nutch.storage.ProtocolStatus
Creates a new ProtocolStatus RecordBuilder
newBuilder(ProtocolStatus.Builder) - Static method in class org.apache.nutch.storage.ProtocolStatus
Creates a new ProtocolStatus RecordBuilder by copying an existing Builder
newBuilder(ProtocolStatus) - Static method in class org.apache.nutch.storage.ProtocolStatus
Creates a new ProtocolStatus RecordBuilder by copying an existing ProtocolStatus instance
newBuilder() - Static method in class org.apache.nutch.storage.WebPage
Creates a new WebPage RecordBuilder
newBuilder(WebPage.Builder) - Static method in class org.apache.nutch.storage.WebPage
Creates a new WebPage RecordBuilder by copying an existing Builder
newBuilder(WebPage) - Static method in class org.apache.nutch.storage.WebPage
Creates a new WebPage RecordBuilder by copying an existing WebPage instance
newInstance() - Method in class org.apache.nutch.storage.Host
 
newInstance() - Method in class org.apache.nutch.storage.ParseStatus
 
newInstance() - Method in class org.apache.nutch.storage.ProtocolStatus
 
newInstance() - Method in class org.apache.nutch.storage.WebPage
 
next() - Method in class org.apache.nutch.api.impl.db.DbIterator
 
next(Text, BytesWritable) - Method in class org.apache.nutch.tools.arc.ArcRecordReader
Returns true if the next record in the split is read into the key and value pair.
nextKeyValue() - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecordReader
 
nextNode() - Method in class org.apache.nutch.util.NodeWalker
Returns the next Node on the stack and pushes all of its children onto the stack, allowing us to walk the node tree without the use of recursion.
NO_THRESHOLD - Static variable in class org.apache.nutch.util.EncodingDetector
 
nodeChar - Variable in class org.apache.nutch.util.TrieStringMatcher.TrieNode
 
NodeWalker - Class in org.apache.nutch.util
A utility class that allows the walking of any DOM tree using a stack instead of recursion.
NodeWalker(Node) - Constructor for class org.apache.nutch.util.NodeWalker
Starts the Node tree from the root node.
normalize(String, String) - Method in class org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer
 
normalize(String, String) - Method in interface org.apache.nutch.net.URLNormalizer
 
normalize(String, String) - Method in class org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer
 
normalize(String, String) - Method in class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
 
normalize(String, String) - Method in class org.apache.nutch.net.URLNormalizers
Normalize
normalize() - Method in class org.apache.nutch.util.Histogram
 
NOTFETCHING - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Not fetching.
NOTFOUND - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Resource was not found.
NotFoundHandler - Class in org.apache.nutch.tools.proxy
 
NotFoundHandler() - Constructor for class org.apache.nutch.tools.proxy.NotFoundHandler
 
NOTMODIFIED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Unchanged since the last fetch.
NOTPARSED - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsing was not performed.
numJobs - Variable in class org.apache.nutch.util.NutchTool
 
Nutch - Interface in org.apache.nutch.metadata
A collection of Nutch internal metadata constants.
NUTCH_SERVER - Static variable in class org.apache.nutch.api.NutchServer
 
NutchClient - Interface in org.apache.nutch.webui.client
 
NutchClientFactory - Class in org.apache.nutch.webui.client
 
NutchClientFactory() - Constructor for class org.apache.nutch.webui.client.NutchClientFactory
 
NutchClientImpl - Class in org.apache.nutch.webui.client.impl
 
NutchClientImpl(NutchInstance) - Constructor for class org.apache.nutch.webui.client.impl.NutchClientImpl
 
NutchConfig - Class in org.apache.nutch.api.model.request
 
NutchConfig() - Constructor for class org.apache.nutch.api.model.request.NutchConfig
 
NutchConfig - Class in org.apache.nutch.webui.model
 
NutchConfig() - Constructor for class org.apache.nutch.webui.model.NutchConfig
 
NutchConfiguration - Class in org.apache.nutch.util
Utility to create Hadoop Configurations that include Nutch-specific resources.
NutchDocument - Class in org.apache.nutch.indexer
A NutchDocument is the unit of indexing.
NutchDocument() - Constructor for class org.apache.nutch.indexer.NutchDocument
 
nutchFetchIntervalMDName - Static variable in class org.apache.nutch.crawl.InjectorJob
metadata key reserved for setting a custom fetchInterval for a specific URL
NutchGuiConfiguration - Class in org.apache.nutch.webui.config
 
NutchGuiConfiguration() - Constructor for class org.apache.nutch.webui.config.NutchGuiConfiguration
 
NutchInstance - Class in org.apache.nutch.webui.model
 
NutchInstance() - Constructor for class org.apache.nutch.webui.model.NutchInstance
 
NutchInstanceService - Interface in org.apache.nutch.webui.service
 
NutchInstanceServiceImpl - Class in org.apache.nutch.webui.service.impl
 
NutchInstanceServiceImpl() - Constructor for class org.apache.nutch.webui.service.impl.NutchInstanceServiceImpl
 
NutchJob - Class in org.apache.nutch.util
A Job for Nutch jobs.
NutchJob(Configuration) - Constructor for class org.apache.nutch.util.NutchJob
NutchJob(Configuration, String) - Constructor for class org.apache.nutch.util.NutchJob
NutchJobConf - Class in org.apache.nutch.util
A JobConf for Nutch jobs.
NutchJobConf(Configuration) - Constructor for class org.apache.nutch.util.NutchJobConf
 
nutchScoreMDName - Static variable in class org.apache.nutch.crawl.InjectorJob
metadata key reserved for setting a custom score for a specific URL
NutchServer - Class in org.apache.nutch.api
 
NutchServer() - Constructor for class org.apache.nutch.api.NutchServer
Public constructor which accepts the port we wish to run the server on as well as the logging granularity.
NutchServerPoolExecutor - Class in org.apache.nutch.api.impl
 
NutchServerPoolExecutor(int, int, long, TimeUnit, BlockingQueue<Runnable>) - Constructor for class org.apache.nutch.api.impl.NutchServerPoolExecutor
 
NutchService - Interface in org.apache.nutch.webui.service
 
NutchServiceImpl - Class in org.apache.nutch.webui.service.impl
 
NutchServiceImpl() - Constructor for class org.apache.nutch.webui.service.impl.NutchServiceImpl
 
NutchStatus - Class in org.apache.nutch.api.model.response
 
NutchStatus() - Constructor for class org.apache.nutch.api.model.response.NutchStatus
 
NutchStatus - Class in org.apache.nutch.webui.client.model
 
NutchStatus() - Constructor for class org.apache.nutch.webui.client.model.NutchStatus
 
NutchTool - Class in org.apache.nutch.util
 
NutchTool() - Constructor for class org.apache.nutch.util.NutchTool
 
NutchUiApplication - Class in org.apache.nutch.webui
 
NutchUiApplication() - Constructor for class org.apache.nutch.webui.NutchUiApplication
 
NutchUiCssReference - Class in org.apache.nutch.webui.pages.assets
 
NutchUiServer - Class in org.apache.nutch.webui
 
NutchUiServer() - Constructor for class org.apache.nutch.webui.NutchUiServer
 
NutchWritable - Class in org.apache.nutch.crawl
 
NutchWritable() - Constructor for class org.apache.nutch.crawl.NutchWritable
 
NutchWritable(Writable) - Constructor for class org.apache.nutch.crawl.NutchWritable
 

O

ObjectCache - Class in org.apache.nutch.util
 
onCrawlError(Crawl, String) - Method in interface org.apache.nutch.webui.client.impl.CrawlingCycleListener
 
onCrawlError(Crawl, String) - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
onInitialize() - Method in class org.apache.nutch.webui.pages.components.ColorEnumLabel
 
open(Configuration) - Method in interface org.apache.nutch.indexer.IndexWriter
 
open(Configuration) - Method in class org.apache.nutch.indexer.IndexWriters
 
open(Configuration) - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
open(Configuration) - Method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
OPICScoringFilter - Class in org.apache.nutch.scoring.opic
This plugin implements a variant of an Online Page Importance Computation (OPIC) score, described in this paper: Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003), Adaptive On-Line Page Importance Computation .
OPICScoringFilter() - Constructor for class org.apache.nutch.scoring.opic.OPICScoringFilter
 
org.apache.nutch.analysis.lang - package org.apache.nutch.analysis.lang
Text document language identifier.
org.apache.nutch.api - package org.apache.nutch.api
REST API to run and control crawl jobs.
org.apache.nutch.api.impl - package org.apache.nutch.api.impl
Implementations of REST API interfaces.
org.apache.nutch.api.impl.db - package org.apache.nutch.api.impl.db
 
org.apache.nutch.api.misc - package org.apache.nutch.api.misc
 
org.apache.nutch.api.model.request - package org.apache.nutch.api.model.request
 
org.apache.nutch.api.model.response - package org.apache.nutch.api.model.response
 
org.apache.nutch.api.resources - package org.apache.nutch.api.resources
 
org.apache.nutch.collection - package org.apache.nutch.collection
Subcollection is a subset of an index.
org.apache.nutch.crawl - package org.apache.nutch.crawl
Crawl control code and tools to run the crawler.
org.apache.nutch.fetcher - package org.apache.nutch.fetcher
The Nutch robot.
org.apache.nutch.host - package org.apache.nutch.host
Host database to store metadata per host.
org.apache.nutch.indexer - package org.apache.nutch.indexer
Index content, configure and run indexing and cleaning jobs to add, update, and delete documents from an index.
org.apache.nutch.indexer.anchor - package org.apache.nutch.indexer.anchor
An indexing plugin for inbound anchor text.
org.apache.nutch.indexer.basic - package org.apache.nutch.indexer.basic
A basic indexing plugin, adds basic fields: url, host, title, content, etc.
org.apache.nutch.indexer.html - package org.apache.nutch.indexer.html
Index raw HTML content.
org.apache.nutch.indexer.metadata - package org.apache.nutch.indexer.metadata
Indexing filter to add document metadata to the index.
org.apache.nutch.indexer.more - package org.apache.nutch.indexer.more
A more indexing plugin, adds "more" index fields: last modified date, MIME type, content length.
org.apache.nutch.indexer.solr - package org.apache.nutch.indexer.solr
 
org.apache.nutch.indexer.subcollection - package org.apache.nutch.indexer.subcollection
Indexing filter to assign documents to subcollections.
org.apache.nutch.indexer.tld - package org.apache.nutch.indexer.tld
Top Level Domain Indexing plugin.
org.apache.nutch.indexwriter.elastic - package org.apache.nutch.indexwriter.elastic
Index writer plugin for Elasticsearch.
org.apache.nutch.indexwriter.solr - package org.apache.nutch.indexwriter.solr
Index writer plugin for Apache Solr.
org.apache.nutch.metadata - package org.apache.nutch.metadata
A Multi-valued Metadata container, and set of constant fields for Nutch Metadata.
org.apache.nutch.microformats.reltag - package org.apache.nutch.microformats.reltag
A microformats Rel-Tag Parser/Indexer/Querier plugin.
org.apache.nutch.net - package org.apache.nutch.net
Web-related interfaces: URL filters and normalizers.
org.apache.nutch.net.protocols - package org.apache.nutch.net.protocols
Helper classes related to the Protocol interface, sea also org.apache.nutch.protocol.
org.apache.nutch.net.urlnormalizer.basic - package org.apache.nutch.net.urlnormalizer.basic
URL normalizer performing basic normalizations: remove default ports and dot segments in path.
org.apache.nutch.net.urlnormalizer.pass - package org.apache.nutch.net.urlnormalizer.pass
URL normalizer dummy which does not change URLs.
org.apache.nutch.net.urlnormalizer.regex - package org.apache.nutch.net.urlnormalizer.regex
URL normalizer with configurable rules based on regular expressions (Pattern).
org.apache.nutch.parse - package org.apache.nutch.parse
The Parse interface and related classes.
org.apache.nutch.parse.html - package org.apache.nutch.parse.html
An HTML document parsing plugin.
org.apache.nutch.parse.js - package org.apache.nutch.parse.js
Parser and parse filter plugin to extract all (possible) links from JavaScript files and embedded JavaScript code snippets.
org.apache.nutch.parse.metatags - package org.apache.nutch.parse.metatags
Parse filter to extract meta tags: keywords, description, etc.
org.apache.nutch.parse.tika - package org.apache.nutch.parse.tika
Parse various document formats with help of Apache Tika.
org.apache.nutch.plugin - package org.apache.nutch.plugin
The Nutch Plugin System.
org.apache.nutch.protocol - package org.apache.nutch.protocol
Classes related to the Protocol interface, see also org.apache.nutch.net.protocols.
org.apache.nutch.protocol.file - package org.apache.nutch.protocol.file
Protocol plugin which supports retrieving local file resources.
org.apache.nutch.protocol.ftp - package org.apache.nutch.protocol.ftp
Protocol plugin which supports retrieving documents via the ftp protocol.
org.apache.nutch.protocol.http - package org.apache.nutch.protocol.http
Protocol plugin which supports retrieving documents via the http protocol.
org.apache.nutch.protocol.http.api - package org.apache.nutch.protocol.http.api
Common API used by HTTP plugins (http, httpclient)
org.apache.nutch.protocol.httpclient - package org.apache.nutch.protocol.httpclient
Protocol plugin which supports retrieving documents via the HTTP and HTTPS protocols, optionally with Basic, Digest and NTLM authentication schemes for web server as well as proxy server.
org.apache.nutch.protocol.sftp - package org.apache.nutch.protocol.sftp
Protocol plugin which supports retrieving documents via the sftp protocol.
org.apache.nutch.scoring - package org.apache.nutch.scoring
The ScoringFilter interface.
org.apache.nutch.scoring.link - package org.apache.nutch.scoring.link
Scoring filter used in conjunction with org.apache.nutch.scoring.webgraph.WebGraph.
org.apache.nutch.scoring.opic - package org.apache.nutch.scoring.opic
Scoring filter implementing a variant of the Online Page Importance Computation (OPIC) algorithm.
org.apache.nutch.scoring.tld - package org.apache.nutch.scoring.tld
Top Level Domain Scoring plugin.
org.apache.nutch.storage - package org.apache.nutch.storage
Representation (web pages, host metadata) of data in abstracted storage.
org.apache.nutch.tools - package org.apache.nutch.tools
Miscellaneous tools.
org.apache.nutch.tools.arc - package org.apache.nutch.tools.arc
Tools to read the Arc file format.
org.apache.nutch.tools.proxy - package org.apache.nutch.tools.proxy
Proxy to benchmark the crawler.
org.apache.nutch.urlfilter.api - package org.apache.nutch.urlfilter.api
Generic URL filter library, abstracting away from regular expression implementations.
org.apache.nutch.urlfilter.automaton - package org.apache.nutch.urlfilter.automaton
URL filter plugin based on dk.brics.automaton Finite-State Automata for JavaTM.
org.apache.nutch.urlfilter.domain - package org.apache.nutch.urlfilter.domain
URL filter plugin to include only URLs which match an element in a given list of domain suffixes, domain names, and/or host names.
org.apache.nutch.urlfilter.prefix - package org.apache.nutch.urlfilter.prefix
URL filter plugin to include only URLs which match one of a given list of URL prefixes.
org.apache.nutch.urlfilter.regex - package org.apache.nutch.urlfilter.regex
URL filter plugin to include and/or exclude URLs matching Java regular expressions.
org.apache.nutch.urlfilter.suffix - package org.apache.nutch.urlfilter.suffix
URL filter plugin to either exclude or include only URLs which match one of the given (path) suffixes.
org.apache.nutch.urlfilter.validator - package org.apache.nutch.urlfilter.validator
URL filter plugin that validates given urls.
org.apache.nutch.util - package org.apache.nutch.util
Miscellaneous utility classes.
org.apache.nutch.util.domain - package org.apache.nutch.util.domain
Classes for domain name analysis.
org.apache.nutch.webui - package org.apache.nutch.webui
 
org.apache.nutch.webui.client - package org.apache.nutch.webui.client
 
org.apache.nutch.webui.client.impl - package org.apache.nutch.webui.client.impl
 
org.apache.nutch.webui.client.model - package org.apache.nutch.webui.client.model
 
org.apache.nutch.webui.config - package org.apache.nutch.webui.config
 
org.apache.nutch.webui.model - package org.apache.nutch.webui.model
 
org.apache.nutch.webui.pages - package org.apache.nutch.webui.pages
 
org.apache.nutch.webui.pages.assets - package org.apache.nutch.webui.pages.assets
 
org.apache.nutch.webui.pages.components - package org.apache.nutch.webui.pages.components
 
org.apache.nutch.webui.pages.crawls - package org.apache.nutch.webui.pages.crawls
 
org.apache.nutch.webui.pages.instances - package org.apache.nutch.webui.pages.instances
 
org.apache.nutch.webui.pages.menu - package org.apache.nutch.webui.pages.menu
 
org.apache.nutch.webui.pages.seed - package org.apache.nutch.webui.pages.seed
 
org.apache.nutch.webui.pages.settings - package org.apache.nutch.webui.pages.settings
 
org.apache.nutch.webui.service - package org.apache.nutch.webui.service
 
org.apache.nutch.webui.service.impl - package org.apache.nutch.webui.service.impl
 
org.creativecommons.nutch - package org.creativecommons.nutch
Sample plugins that parse and index Creative Commons medadata.
ORIGINAL_CHAR_ENCODING - Static variable in interface org.apache.nutch.metadata.Nutch
 
Outlink - Class in org.apache.nutch.parse
 
Outlink() - Constructor for class org.apache.nutch.parse.Outlink
 
Outlink(String, String) - Constructor for class org.apache.nutch.parse.Outlink
 
OutlinkExtractor - Class in org.apache.nutch.parse
Extractor to extract Outlinks / URLs from plain text using Regular Expressions.
OutlinkExtractor() - Constructor for class org.apache.nutch.parse.OutlinkExtractor
 

P

padHead(byte[], int) - Static method in class org.apache.nutch.util.Bytes
 
padTail(byte[], int) - Static method in class org.apache.nutch.util.Bytes
 
Pair<F,S> - Class in org.apache.nutch.util
 
Pair(F, S) - Constructor for class org.apache.nutch.util.Pair
 
parse(InputStream) - Method in class org.apache.nutch.collection.CollectionManager
 
Parse - Class in org.apache.nutch.parse
 
Parse() - Constructor for class org.apache.nutch.parse.Parse
 
Parse(String, String, Outlink[], ParseStatus) - Constructor for class org.apache.nutch.parse.Parse
 
parse(Configuration) - Method in class org.apache.nutch.parse.ParsePluginsReader
Reads the parse-plugins.xml file and returns the #ParsePluginList defined by it.
parse(String, boolean, boolean) - Method in class org.apache.nutch.parse.ParserJob
 
parse(String, WebPage) - Method in class org.apache.nutch.parse.ParseUtil
Performs a parse by iterating through a List of preferred Parsers until a successful parse is performed and a Parse object is returned.
PARSE_KEY - Static variable in class org.apache.nutch.fetcher.FetcherJob
 
PARSE_META_PREFIX - Static variable in class org.apache.nutch.parse.metatags.MetaTagsParser
 
parseCharacterEncoding(CharSequence) - Static method in class org.apache.nutch.util.EncodingDetector
Parse the character encoding from the specified content type header.
parseDmozFile(File, int, boolean, int, Pattern, boolean) - Method in class org.apache.nutch.tools.DmozParser
Iterate through all the items in this structured DMOZ file.
ParseException - Exception in org.apache.nutch.parse
 
ParseException() - Constructor for exception org.apache.nutch.parse.ParseException
 
ParseException(String) - Constructor for exception org.apache.nutch.parse.ParseException
 
ParseException(String, Throwable) - Constructor for exception org.apache.nutch.parse.ParseException
 
ParseException(Throwable) - Constructor for exception org.apache.nutch.parse.ParseException
 
ParseFilter - Interface in org.apache.nutch.parse
Extension point for DOM-based parsers.
ParseFilters - Class in org.apache.nutch.parse
Creates and caches ParseFilter implementing plugins.
ParseFilters(Configuration) - Constructor for class org.apache.nutch.parse.ParseFilters
 
parseList(ArrayList<String>, String) - Method in class org.apache.nutch.collection.Subcollection
Create a list of patterns from chunk of text, patterns are separated with newline
parsePluginFolder(String[]) - Method in class org.apache.nutch.plugin.PluginManifestParser
Returns a list of all found plugin descriptors.
ParsePluginList - Class in org.apache.nutch.parse
This class represents a natural ordering for which parsing plugin should get called for a particular mimeType.
ParsePluginsReader - Class in org.apache.nutch.parse
A reader to load the information stored in the $NUTCH_HOME/conf/parse-plugins.xml file.
ParsePluginsReader() - Constructor for class org.apache.nutch.parse.ParsePluginsReader
Constructs a new ParsePluginsReader
Parser - Interface in org.apache.nutch.parse
A parser for content generated by a Protocol implementation.
ParserChecker - Class in org.apache.nutch.parse
Parser checker, useful for testing parser.
ParserChecker() - Constructor for class org.apache.nutch.parse.ParserChecker
 
ParserFactory - Class in org.apache.nutch.parse
Creates and caches Parser plugins.
ParserFactory(Configuration) - Constructor for class org.apache.nutch.parse.ParserFactory
 
ParserJob - Class in org.apache.nutch.parse
 
ParserJob() - Constructor for class org.apache.nutch.parse.ParserJob
 
ParserJob(Configuration) - Constructor for class org.apache.nutch.parse.ParserJob
 
ParserJob.ParserMapper - Class in org.apache.nutch.parse
 
ParserJob.ParserMapper() - Constructor for class org.apache.nutch.parse.ParserJob.ParserMapper
 
ParserNotFound - Exception in org.apache.nutch.parse
 
ParserNotFound(String) - Constructor for exception org.apache.nutch.parse.ParserNotFound
 
ParserNotFound(String, String) - Constructor for exception org.apache.nutch.parse.ParserNotFound
 
ParserNotFound(String, String, String) - Constructor for exception org.apache.nutch.parse.ParserNotFound
 
parseRules(String, byte[], String, String) - Method in class org.apache.nutch.protocol.RobotRulesParser
Parses the robots content using the SimpleRobotRulesParser from crawler commons
ParseStatus - Class in org.apache.nutch.storage
 
ParseStatus() - Constructor for class org.apache.nutch.storage.ParseStatus
 
ParseStatus.Builder - Class in org.apache.nutch.storage
RecordBuilder for ParseStatus instances.
ParseStatus.Field - Enum in org.apache.nutch.storage
Enum containing all data bean's fields.
ParseStatus.Tombstone - Class in org.apache.nutch.storage
 
ParseStatusCodes - Interface in org.apache.nutch.parse
 
ParseStatusUtils - Class in org.apache.nutch.parse
 
ParseStatusUtils() - Constructor for class org.apache.nutch.parse.ParseStatusUtils
 
ParseUtil - Class in org.apache.nutch.parse
A Utility class containing methods to simply perform parsing utilities such as iterating through a preferred list of Parsers to obtain Parse objects.
ParseUtil(Configuration) - Constructor for class org.apache.nutch.parse.ParseUtil
 
PARTITION_MODE_DOMAIN - Static variable in class org.apache.nutch.crawl.URLPartitioner
 
PARTITION_MODE_HOST - Static variable in class org.apache.nutch.crawl.URLPartitioner
 
PARTITION_MODE_IP - Static variable in class org.apache.nutch.crawl.URLPartitioner
 
PARTITION_MODE_KEY - Static variable in class org.apache.nutch.crawl.URLPartitioner
 
PARTITION_URL_SEED - Static variable in class org.apache.nutch.crawl.URLPartitioner
 
PassURLNormalizer - Class in org.apache.nutch.net.urlnormalizer.pass
This URLNormalizer doesn't change urls.
PassURLNormalizer() - Constructor for class org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer
 
PASSWORD - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
PASSWORD - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
PERM_REFRESH_TIME - Static variable in class org.apache.nutch.fetcher.FetcherJob
 
Pluggable - Interface in org.apache.nutch.plugin
Defines the capability of a class to be plugged into Nutch.
Plugin - Class in org.apache.nutch.plugin
A nutch-plugin is an container for a set of custom logic that provide extensions to the nutch core functionality or another plugin that provides an API for extending.
Plugin(PluginDescriptor, Configuration) - Constructor for class org.apache.nutch.plugin.Plugin
Constructor
PluginClassLoader - Class in org.apache.nutch.plugin
The PluginClassLoader contains only classes of the runtime libraries setuped in the plugin manifest file and exported libraries of plugins that are required pluguin.
PluginClassLoader(URL[], ClassLoader) - Constructor for class org.apache.nutch.plugin.PluginClassLoader
Construtor
PluginDescriptor - Class in org.apache.nutch.plugin
The PluginDescriptor provide access to all meta information of a nutch-plugin, as well to the internationalizable resources and the plugin own classloader.
PluginDescriptor(String, String, String, String, String, String, Configuration) - Constructor for class org.apache.nutch.plugin.PluginDescriptor
Constructor
PluginManifestParser - Class in org.apache.nutch.plugin
The PluginManifestParser parser just parse the manifest file in all plugin directories.
PluginManifestParser(Configuration, PluginRepository) - Constructor for class org.apache.nutch.plugin.PluginManifestParser
 
PluginRepository - Class in org.apache.nutch.plugin
The plugin repositority is a registry of all plugins.
PluginRepository(Configuration) - Constructor for class org.apache.nutch.plugin.PluginRepository
 
PluginRuntimeException - Exception in org.apache.nutch.plugin
PluginRuntimeException will be thrown until a exception in the plugin managemnt occurs.
PluginRuntimeException(Throwable) - Constructor for exception org.apache.nutch.plugin.PluginRuntimeException
 
PluginRuntimeException(String) - Constructor for exception org.apache.nutch.plugin.PluginRuntimeException
 
PORT - Static variable in interface org.apache.nutch.indexwriter.elastic.ElasticConstants
 
pos - Variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
PrefixStringMatcher - Class in org.apache.nutch.util
A class for efficiently matching Strings against a set of prefixes.
PrefixStringMatcher(String[]) - Constructor for class org.apache.nutch.util.PrefixStringMatcher
Creates a new PrefixStringMatcher which will match Strings with any prefix in the supplied array.
PrefixStringMatcher(Collection<String>) - Constructor for class org.apache.nutch.util.PrefixStringMatcher
Creates a new PrefixStringMatcher which will match Strings with any prefix in the supplied Collection.
PrefixURLFilter - Class in org.apache.nutch.urlfilter.prefix
Filters URLs based on a file of URL prefixes.
PrefixURLFilter() - Constructor for class org.apache.nutch.urlfilter.prefix.PrefixURLFilter
 
PrefixURLFilter(String) - Constructor for class org.apache.nutch.urlfilter.prefix.PrefixURLFilter
 
PrintCommandListener - Class in org.apache.nutch.protocol.ftp
This is a support class for logging all ftp command/reply traffic.
PrintCommandListener(Logger) - Constructor for class org.apache.nutch.protocol.ftp.PrintCommandListener
 
process(String, WebPage) - Method in class org.apache.nutch.parse.ParseUtil
Parses given web page and stores parsed content within page.
processDeflateEncoded(byte[], URL) - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
processDumpJob(String, Configuration, String, boolean, boolean, boolean, boolean) - Method in class org.apache.nutch.crawl.WebTableReader
 
processGzipEncoded(byte[], URL) - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
processingInstruction(String, String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of a processing instruction.
processStatJob(boolean) - Method in class org.apache.nutch.crawl.WebTableReader
 
PROTO_NOT_FOUND - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
This protocol was not found.
PROTO_STATUS_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
Protocol - Interface in org.apache.nutch.protocol
A retriever of url content.
PROTOCOL_REDIR - Static variable in class org.apache.nutch.fetcher.FetcherJob
 
protocolCommandSent(ProtocolCommandEvent) - Method in class org.apache.nutch.protocol.ftp.PrintCommandListener
 
ProtocolException - Exception in org.apache.nutch.net.protocols
Deprecated.
Use ProtocolException instead.
ProtocolException() - Constructor for exception org.apache.nutch.net.protocols.ProtocolException
Deprecated.
 
ProtocolException(String) - Constructor for exception org.apache.nutch.net.protocols.ProtocolException
Deprecated.
 
ProtocolException(String, Throwable) - Constructor for exception org.apache.nutch.net.protocols.ProtocolException
Deprecated.
 
ProtocolException(Throwable) - Constructor for exception org.apache.nutch.net.protocols.ProtocolException
Deprecated.
 
ProtocolException - Exception in org.apache.nutch.protocol
 
ProtocolException() - Constructor for exception org.apache.nutch.protocol.ProtocolException
 
ProtocolException(String) - Constructor for exception org.apache.nutch.protocol.ProtocolException
 
ProtocolException(String, Throwable) - Constructor for exception org.apache.nutch.protocol.ProtocolException
 
ProtocolException(Throwable) - Constructor for exception org.apache.nutch.protocol.ProtocolException
 
ProtocolFactory - Class in org.apache.nutch.protocol
Creates and caches Protocol plugins.
ProtocolFactory(Configuration) - Constructor for class org.apache.nutch.protocol.ProtocolFactory
 
ProtocolNotFound - Exception in org.apache.nutch.protocol
 
ProtocolNotFound(String) - Constructor for exception org.apache.nutch.protocol.ProtocolNotFound
 
ProtocolNotFound(String, String) - Constructor for exception org.apache.nutch.protocol.ProtocolNotFound
 
ProtocolOutput - Class in org.apache.nutch.protocol
Simple aggregate to pass from protocol plugins both content and protocol status.
ProtocolOutput(Content, ProtocolStatus) - Constructor for class org.apache.nutch.protocol.ProtocolOutput
 
ProtocolOutput(Content) - Constructor for class org.apache.nutch.protocol.ProtocolOutput
 
protocolReplyReceived(ProtocolCommandEvent) - Method in class org.apache.nutch.protocol.ftp.PrintCommandListener
 
ProtocolStatus - Class in org.apache.nutch.storage
 
ProtocolStatus() - Constructor for class org.apache.nutch.storage.ProtocolStatus
 
ProtocolStatus.Builder - Class in org.apache.nutch.storage
RecordBuilder for ProtocolStatus instances.
ProtocolStatus.Field - Enum in org.apache.nutch.storage
Enum containing all data bean's fields.
ProtocolStatus.Tombstone - Class in org.apache.nutch.storage
 
ProtocolStatusCodes - Interface in org.apache.nutch.protocol
 
ProtocolStatusUtils - Class in org.apache.nutch.protocol
 
ProtocolStatusUtils() - Constructor for class org.apache.nutch.protocol.ProtocolStatusUtils
 
proxyHost - Variable in class org.apache.nutch.protocol.http.api.HttpBase
The proxy hostname.
proxyPort - Variable in class org.apache.nutch.protocol.http.api.HttpBase
The proxy port.
PUBLISHER - Static variable in interface org.apache.nutch.metadata.DublinCore
An entity responsible for making the resource available.
put(String, Host) - Method in class org.apache.nutch.host.HostDb
 
put(int, Object) - Method in class org.apache.nutch.storage.Host
 
put(int, Object) - Method in class org.apache.nutch.storage.ParseStatus
 
put(int, Object) - Method in class org.apache.nutch.storage.ProtocolStatus
 
put(int, Object) - Method in class org.apache.nutch.storage.WebPage
 
putByte(byte[], int, byte) - Static method in class org.apache.nutch.util.Bytes
Write a single byte out to the specified byte array position.
putBytes(byte[], int, byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
Put bytes at the specified byte array position.
putDouble(byte[], int, double) - Static method in class org.apache.nutch.util.Bytes
 
putFloat(byte[], int, float) - Static method in class org.apache.nutch.util.Bytes
 
putInt(byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
Put an int value out to the specified byte array position.
putLong(byte[], int, long) - Static method in class org.apache.nutch.util.Bytes
Put a long value out to the specified byte array position.
putMark(WebPage, Utf8) - Method in enum org.apache.nutch.storage.Mark
 
putMark(WebPage, String) - Method in enum org.apache.nutch.storage.Mark
 
putShort(byte[], int, short) - Static method in class org.apache.nutch.util.Bytes
Put a short value out to the specified byte array position.

R

RAMConfManager - Class in org.apache.nutch.api.impl
 
RAMConfManager() - Constructor for class org.apache.nutch.api.impl.RAMConfManager
 
RAMJobManager - Class in org.apache.nutch.api.impl
 
RAMJobManager(JobFactory, NutchServerPoolExecutor, ConfManager) - Constructor for class org.apache.nutch.api.impl.RAMJobManager
 
randomBatchId() - Static method in class org.apache.nutch.crawl.GeneratorJob
Generate a random batch id
read(DataInput) - Static method in class org.apache.nutch.parse.Outlink
 
read(DataInput) - Static method in class org.apache.nutch.protocol.Content
 
readByteArray(DataInput) - Static method in class org.apache.nutch.util.Bytes
Read byte-array written with a WritableableUtils.vint prefix.
readByteArrayThrowsRuntime(DataInput) - Static method in class org.apache.nutch.util.Bytes
Read byte-array written with a WritableableUtils.vint prefix.
readConfiguration(Reader) - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
readFields(DataInput) - Method in class org.apache.nutch.crawl.GeneratorJob.SelectorEntry
 
readFields(DataInput) - Method in class org.apache.nutch.crawl.UrlWithScore
 
readFields(DataInput) - Method in class org.apache.nutch.fetcher.FetchEntry
 
readFields(DataInput) - Method in class org.apache.nutch.indexer.NutchDocument
 
readFields(DataInput) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputSplit
 
readFields(DataInput) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecord
 
readFields(DataInput) - Method in class org.apache.nutch.metadata.Metadata
 
readFields(DataInput) - Method in class org.apache.nutch.metadata.MetaWrapper
 
readFields(DataInput) - Method in class org.apache.nutch.parse.Outlink
 
readFields(DataInput) - Method in class org.apache.nutch.protocol.Content
 
readFields(DataInput) - Method in class org.apache.nutch.scoring.ScoreDatum
 
readFields(DataInput) - Method in class org.apache.nutch.util.GenericWritableConfigurable
 
readFields(DataInput) - Method in class org.apache.nutch.util.WebPageWritable
 
readSolrDocument(SolrDocument) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecord
 
readVLong(byte[], int) - Static method in class org.apache.nutch.util.Bytes
Reads a zero-compressed encoded long from input stream and returns it.
recordJobStatus(String, Job, Map<String, Object>) - Static method in class org.apache.nutch.util.ToolUtil
 
REDIR_EXCEEDED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Too many redirects.
REDIRECT_DISCOVERED - Static variable in class org.apache.nutch.fetcher.FetcherJob
 
reduce(UrlWithScore, Iterable<NutchWritable>, Reducer<UrlWithScore, NutchWritable, String, WebPage>.Context) - Method in class org.apache.nutch.crawl.DbUpdateReducer
 
reduce(GeneratorJob.SelectorEntry, Iterable<WebPage>, Reducer<GeneratorJob.SelectorEntry, WebPage, String, WebPage>.Context) - Method in class org.apache.nutch.crawl.GeneratorReducer
 
reduce(Text, Iterable<LongWritable>, Reducer<Text, LongWritable, Text, LongWritable>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatCombiner
 
reduce(Text, Iterable<LongWritable>, Reducer<Text, LongWritable, Text, LongWritable>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatReducer
 
reduce(Text, Iterable<WebPage>, Reducer<Text, WebPage, String, Host>.Context) - Method in class org.apache.nutch.host.HostDbUpdateReducer
 
reduce(String, Iterable<WebPage>, Reducer<String, WebPage, NullWritable, NullWritable>.Context) - Method in class org.apache.nutch.indexer.CleaningJob.CleanReducer
 
reduce(Text, Iterable<SolrDeleteDuplicates.SolrRecord>, Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>.Context) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
reduce(Text, Iterable<LongWritable>, Reducer<Text, LongWritable, Text, LongWritable>.Context) - Method in class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsCombiner
 
reduce(Text, Iterable<LongWritable>, Reducer<Text, LongWritable, LongWritable, Text>.Context) - Method in class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsReducer
 
reduce(String, Iterable<WebPage>, Reducer<String, WebPage, String, WebPage>.Context) - Method in class org.apache.nutch.util.IdentityPageReducer
 
regexNormalize(String, String) - Method in class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
This function does the replacements by iterating through all the regex patterns.
RegexRule - Class in org.apache.nutch.urlfilter.api
A generic regular expression rule.
RegexRule(boolean, String) - Constructor for class org.apache.nutch.urlfilter.api.RegexRule
Constructs a new regular expression rule.
RegexURLFilter - Class in org.apache.nutch.urlfilter.regex
Filters URLs based on a file of regular expressions using the Java Regex implementation.
RegexURLFilter() - Constructor for class org.apache.nutch.urlfilter.regex.RegexURLFilter
 
RegexURLFilter(String) - Constructor for class org.apache.nutch.urlfilter.regex.RegexURLFilter
 
RegexURLFilterBase - Class in org.apache.nutch.urlfilter.api
Generic URL filter based on regular expressions.
RegexURLFilterBase() - Constructor for class org.apache.nutch.urlfilter.api.RegexURLFilterBase
Constructs a new empty RegexURLFilterBase
RegexURLFilterBase(File) - Constructor for class org.apache.nutch.urlfilter.api.RegexURLFilterBase
Constructs a new RegexURLFilter and init it with a file of rules.
RegexURLFilterBase(String) - Constructor for class org.apache.nutch.urlfilter.api.RegexURLFilterBase
Constructs a new RegexURLFilter and inits it with a list of rules.
RegexURLFilterBase(Reader) - Constructor for class org.apache.nutch.urlfilter.api.RegexURLFilterBase
Constructs a new RegexURLFilter and init it with a Reader of rules.
RegexURLNormalizer - Class in org.apache.nutch.net.urlnormalizer.regex
Allows users to do regex substitutions on all/any URLs that are encountered, which is useful for stripping session IDs from URLs.
RegexURLNormalizer() - Constructor for class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
The default constructor which is called from UrlNormalizerFactory (normalizerClass.newInstance()) in method: getNormalizer()*
RegexURLNormalizer(Configuration) - Constructor for class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
 
RegexURLNormalizer(Configuration, String) - Constructor for class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
Constructor which can be passed the file name, so it doesn't look in the configuration files for it.
REL_TAG - Static variable in class org.apache.nutch.microformats.reltag.RelTagParser
 
RELATION - Static variable in interface org.apache.nutch.metadata.DublinCore
A reference to a related resource.
RelTagIndexingFilter - Class in org.apache.nutch.microformats.reltag
An IndexingFilter that adds tag field(s) to the document.
RelTagIndexingFilter() - Constructor for class org.apache.nutch.microformats.reltag.RelTagIndexingFilter
 
RelTagParser - Class in org.apache.nutch.microformats.reltag
Adds microformat rel-tags of document if found.
RelTagParser() - Constructor for class org.apache.nutch.microformats.reltag.RelTagParser
 
RemoteCommand - Class in org.apache.nutch.webui.client.impl
 
RemoteCommand(JobConfig) - Constructor for class org.apache.nutch.webui.client.impl.RemoteCommand
 
RemoteCommandBuilder - Class in org.apache.nutch.webui.client.impl
 
RemoteCommandExecutor - Class in org.apache.nutch.webui.client.impl
This class executes remote job and waits for success/failure result
RemoteCommandExecutor(NutchClient) - Constructor for class org.apache.nutch.webui.client.impl.RemoteCommandExecutor
 
RemoteCommandExecutor.JobStateChecker - Class in org.apache.nutch.webui.client.impl
 
RemoteCommandExecutor.JobStateChecker(String) - Constructor for class org.apache.nutch.webui.client.impl.RemoteCommandExecutor.JobStateChecker
 
RemoteCommandsBatchFactory - Class in org.apache.nutch.webui.client.impl
 
RemoteCommandsBatchFactory() - Constructor for class org.apache.nutch.webui.client.impl.RemoteCommandsBatchFactory
 
remove(String, WebPage) - Method in interface org.apache.nutch.indexer.IndexCleaningFilter
 
remove(String, WebPage) - Method in class org.apache.nutch.indexer.IndexCleaningFilters
Run all defined filters.
remove(String) - Method in class org.apache.nutch.metadata.Metadata
Remove a metadata and all its associated values.
remove(String) - Method in class org.apache.nutch.metadata.SpellCheckedMetadata
 
removeField(String) - Method in class org.apache.nutch.indexer.NutchDocument
 
removeInstance(Long) - Method in class org.apache.nutch.webui.service.impl.NutchInstanceServiceImpl
 
removeInstance(Long) - Method in interface org.apache.nutch.webui.service.NutchInstanceService
 
removeLockFile(FileSystem, Path) - Static method in class org.apache.nutch.util.LockUtil
Remove lock file.
removeMark(WebPage) - Method in enum org.apache.nutch.storage.Mark
 
removeMarkIfExist(WebPage) - Method in enum org.apache.nutch.storage.Mark
Remove the mark only if the mark is present on the page.
replace(FileSystem, Path, Path, boolean) - Static method in class org.apache.nutch.util.FSUtils
Replaces the current path with the new path and if set removes the old path.
REPR_URL_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
reset() - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets all boolean values to false.
resolveEncodingAlias(String) - Static method in class org.apache.nutch.util.EncodingDetector
 
resolveURL(URL, String) - Static method in class org.apache.nutch.util.URLUtil
Resolve relative URL-s and fix a java.net.URL error in handling of URLs with pure query targets.
ResolveUrls - Class in org.apache.nutch.tools
A simple tool that will spin up multiple threads to resolve urls to ip addresses.
ResolveUrls(String) - Constructor for class org.apache.nutch.tools.ResolveUrls
Create a new ResolveUrls with a file from the local file system.
ResolveUrls(String, int) - Constructor for class org.apache.nutch.tools.ResolveUrls
Create a new ResolveUrls with a urls file and a number of threads for the Thread pool.
resolveUrls() - Method in class org.apache.nutch.tools.ResolveUrls
Creates a thread pool for resolving urls.
Response - Interface in org.apache.nutch.net.protocols
A response interface.
responseTime - Variable in class org.apache.nutch.protocol.http.api.HttpBase
Response Time
results - Variable in class org.apache.nutch.util.NutchTool
 
RESUME_KEY - Static variable in class org.apache.nutch.fetcher.FetcherJob
 
retrieveFile(String, OutputStream, int) - Method in class org.apache.nutch.protocol.ftp.Client
Retrieve a file for path
retrieveList(String, List<FTPFile>, int, FTPFileEntryParser) - Method in class org.apache.nutch.protocol.ftp.Client
Retrieve a list reply for path
RETRY - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Temporary failure.
reverseHost(String) - Static method in class org.apache.nutch.util.TableUtil
 
reverseUrl(String) - Static method in class org.apache.nutch.util.TableUtil
Reverses a url's domain.
reverseUrl(URL) - Static method in class org.apache.nutch.util.TableUtil
Reverses a url's domain.
rightPad(String, int) - Static method in class org.apache.nutch.util.StringUtil
Returns a copy of s padded with trailing spaces so that it's length is length.
RIGHTS - Static variable in interface org.apache.nutch.metadata.DublinCore
Information about rights held in and over the resource.
RobotRules - Interface in org.apache.nutch.protocol
This class holds the rules which were parsed from a robots.txt file, and can test paths against those rules.
RobotRulesParser - Class in org.apache.nutch.protocol
This class uses crawler-commons for handling the parsing of robots.txt files.
RobotRulesParser() - Constructor for class org.apache.nutch.protocol.RobotRulesParser
 
RobotRulesParser(Configuration) - Constructor for class org.apache.nutch.protocol.RobotRulesParser
 
ROBOTS_DENIED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Access denied by robots.txt rules.
root - Variable in class org.apache.nutch.util.TrieStringMatcher
 
run() - Method in class org.apache.nutch.api.impl.JobWorker
 
run(Map<String, Object>) - Method in class org.apache.nutch.crawl.DbUpdaterJob
 
run(String[]) - Method in class org.apache.nutch.crawl.DbUpdaterJob
 
run(Map<String, Object>) - Method in class org.apache.nutch.crawl.GeneratorJob
 
run(String[]) - Method in class org.apache.nutch.crawl.GeneratorJob
 
run(Map<String, Object>) - Method in class org.apache.nutch.crawl.InjectorJob
 
run(String[]) - Method in class org.apache.nutch.crawl.InjectorJob
 
run(String[]) - Method in class org.apache.nutch.crawl.WebTableReader
 
run(Map<String, Object>) - Method in class org.apache.nutch.crawl.WebTableReader
 
run(Map<String, Object>) - Method in class org.apache.nutch.fetcher.FetcherJob
 
run(String[]) - Method in class org.apache.nutch.fetcher.FetcherJob
 
run(Reducer<IntWritable, FetchEntry, String, WebPage>.Context) - Method in class org.apache.nutch.fetcher.FetcherReducer
 
run(String[]) - Method in class org.apache.nutch.host.HostDbReader
 
run(String[]) - Method in class org.apache.nutch.host.HostDbUpdateJob
 
run(String[]) - Method in class org.apache.nutch.host.HostInjectorJob
 
run(Map<String, Object>) - Method in class org.apache.nutch.indexer.CleaningJob
 
run(String[]) - Method in class org.apache.nutch.indexer.CleaningJob
 
run(String[]) - Method in class org.apache.nutch.indexer.IndexingFiltersChecker
 
run(Map<String, Object>) - Method in class org.apache.nutch.indexer.IndexingJob
 
run(String[]) - Method in class org.apache.nutch.indexer.IndexingJob
 
run(String[]) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
run(String[]) - Method in class org.apache.nutch.parse.ParserChecker
 
run(Map<String, Object>) - Method in class org.apache.nutch.parse.ParserJob
 
run(String[]) - Method in class org.apache.nutch.parse.ParserJob
 
run(String[]) - Method in class org.apache.nutch.tools.Benchmark
 
run(String[]) - Method in class org.apache.nutch.util.domain.DomainStatistics
 
run(Map<String, Object>) - Method in class org.apache.nutch.util.NutchTool
Runs the tool, using a map of arguments.
runQuery(DbFilter) - Method in class org.apache.nutch.api.impl.db.DbReader
 
runQuery(DbFilter) - Method in class org.apache.nutch.api.resources.DbResource
 

S

save() - Method in class org.apache.nutch.collection.CollectionManager
Save collections into file
save(SeedList) - Method in class org.apache.nutch.webui.service.impl.SeedListServiceImpl
 
save(SeedList) - Method in interface org.apache.nutch.webui.service.SeedListService
 
saveCrawl(Crawl) - Method in interface org.apache.nutch.webui.service.CrawlService
 
saveCrawl(Crawl) - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
saveDom(OutputStream, Element) - Static method in class org.apache.nutch.util.DomUtil
save dom into ouputstream
saveInstance(NutchInstance) - Method in class org.apache.nutch.webui.service.impl.NutchInstanceServiceImpl
 
saveInstance(NutchInstance) - Method in interface org.apache.nutch.webui.service.NutchInstanceService
 
SchedulingPage - Class in org.apache.nutch.webui.pages
 
SchedulingPage() - Constructor for class org.apache.nutch.webui.pages.SchedulingPage
 
SCHEMA$ - Static variable in class org.apache.nutch.storage.Host
 
SCHEMA$ - Static variable in class org.apache.nutch.storage.ParseStatus
 
SCHEMA$ - Static variable in class org.apache.nutch.storage.ProtocolStatus
 
SCHEMA$ - Static variable in class org.apache.nutch.storage.WebPage
 
SCOPE_CRAWLDB - Static variable in class org.apache.nutch.net.URLNormalizers
Scope used when updating the CrawlDb with new URLs.
SCOPE_DEFAULT - Static variable in class org.apache.nutch.net.URLNormalizers
Default scope.
SCOPE_FETCHER - Static variable in class org.apache.nutch.net.URLNormalizers
Scope used by FetcherJob when processing redirect URLs.
SCOPE_GENERATE_HOST_COUNT - Static variable in class org.apache.nutch.net.URLNormalizers
Scope used by GeneratorJob.
SCOPE_INJECT - Static variable in class org.apache.nutch.net.URLNormalizers
Scope used by InjectorJob.
SCOPE_LINKDB - Static variable in class org.apache.nutch.net.URLNormalizers
Scope used when updating the LinkDb with new URLs.
SCOPE_OUTLINK - Static variable in class org.apache.nutch.net.URLNormalizers
Scope used when constructing new Outlink instances.
SCOPE_PARTITION - Static variable in class org.apache.nutch.net.URLNormalizers
Scope used by URLPartitioner.
SCORE_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
ScoreDatum - Class in org.apache.nutch.scoring
 
ScoreDatum() - Constructor for class org.apache.nutch.scoring.ScoreDatum
 
ScoreDatum(float, String, String, int) - Constructor for class org.apache.nutch.scoring.ScoreDatum
 
ScoringFilter - Interface in org.apache.nutch.scoring
A contract defining behavior of scoring plugins.
ScoringFilterException - Exception in org.apache.nutch.scoring
Specialized exception for errors during scoring.
ScoringFilterException() - Constructor for exception org.apache.nutch.scoring.ScoringFilterException
 
ScoringFilterException(String) - Constructor for exception org.apache.nutch.scoring.ScoringFilterException
 
ScoringFilterException(String, Throwable) - Constructor for exception org.apache.nutch.scoring.ScoringFilterException
 
ScoringFilterException(Throwable) - Constructor for exception org.apache.nutch.scoring.ScoringFilterException
 
ScoringFilters - Class in org.apache.nutch.scoring
Creates and caches ScoringFilter implementing plugins.
ScoringFilters(Configuration) - Constructor for class org.apache.nutch.scoring.ScoringFilters
 
SearchPage - Class in org.apache.nutch.webui.pages
 
SearchPage() - Constructor for class org.apache.nutch.webui.pages.SearchPage
 
SECONDS_PER_DAY - Static variable in interface org.apache.nutch.crawl.FetchSchedule
 
SeedList - Class in org.apache.nutch.api.model.request
 
SeedList() - Constructor for class org.apache.nutch.api.model.request.SeedList
 
SeedList - Class in org.apache.nutch.webui.model
 
SeedList() - Constructor for class org.apache.nutch.webui.model.SeedList
 
SeedListService - Interface in org.apache.nutch.webui.service
 
SeedListServiceImpl - Class in org.apache.nutch.webui.service.impl
 
SeedListServiceImpl() - Constructor for class org.apache.nutch.webui.service.impl.SeedListServiceImpl
 
SeedListsPage - Class in org.apache.nutch.webui.pages.seed
This page is for seed lists management
SeedListsPage() - Constructor for class org.apache.nutch.webui.pages.seed.SeedListsPage
 
SeedPage - Class in org.apache.nutch.webui.pages.seed
This page is for seed urls management
SeedPage() - Constructor for class org.apache.nutch.webui.pages.seed.SeedPage
 
SeedPage(PageParameters) - Constructor for class org.apache.nutch.webui.pages.seed.SeedPage
 
SeedResource - Class in org.apache.nutch.api.resources
 
SeedResource() - Constructor for class org.apache.nutch.api.resources.SeedResource
 
SeedUrl - Class in org.apache.nutch.api.model.request
 
SeedUrl() - Constructor for class org.apache.nutch.api.model.request.SeedUrl
 
SeedUrl - Class in org.apache.nutch.webui.model
 
SeedUrl() - Constructor for class org.apache.nutch.webui.model.SeedUrl
 
sendNoOp() - Method in class org.apache.nutch.protocol.ftp.Client
Sends a NOOP command to the FTP server.
server - Variable in class org.apache.nutch.api.resources.AbstractResource
 
SERVER_URL - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
SERVER_URL - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
set(String, float) - Method in class org.apache.nutch.crawl.GeneratorJob.SelectorEntry
Sets url with score on this writable.
set(String, String) - Method in class org.apache.nutch.metadata.Metadata
Set metadata name/value.
set(String, String) - Method in class org.apache.nutch.metadata.SpellCheckedMetadata
 
setAll(Properties) - Method in class org.apache.nutch.metadata.Metadata
Copy All key-value pairs from properties.
setApplicationContext(ApplicationContext) - Method in class org.apache.nutch.webui.NutchUiApplication
 
setArgs(Map<String, Object>) - Method in class org.apache.nutch.api.model.request.JobConfig
 
setArgs(Map<String, Object>) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setArgs(List<CharSequence>) - Method in class org.apache.nutch.storage.ParseStatus.Builder
Sets the value of the 'args' field
setArgs(List<CharSequence>) - Method in class org.apache.nutch.storage.ParseStatus
Sets the value of the 'args' field.
setArgs(List<CharSequence>) - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Sets the value of the 'args' field.
setArgs(List<CharSequence>) - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Sets the value of the 'args' field
setArgs(List<CharSequence>) - Method in class org.apache.nutch.storage.ProtocolStatus
Sets the value of the 'args' field.
setArgs(List<CharSequence>) - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Sets the value of the 'args' field.
setArgs(Map<String, Object>) - Method in class org.apache.nutch.webui.client.model.JobConfig
 
setArgs(Map<String, Object>) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setArgument(String, String) - Method in class org.apache.nutch.webui.client.model.JobConfig
 
setBaseHref(URL) - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets the baseHref.
setBaseUrl(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'baseUrl' field
setBaseUrl(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'baseUrl' field.
setBaseUrl(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'baseUrl' field.
setBatchId(String) - Method in class org.apache.nutch.api.model.request.DbFilter
 
setBatchId(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'batchId' field
setBatchId(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'batchId' field.
setBatchId(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'batchId' field.
setBlackList(String) - Method in class org.apache.nutch.collection.Subcollection
Set contents of blacklist from String
setClazz(String) - Method in class org.apache.nutch.plugin.Extension
Sets the Class that implement the concret extension and is only used until model creation at system start up.
setCode(int) - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Sets the value of the 'code' field
setCode(Integer) - Method in class org.apache.nutch.storage.ProtocolStatus
Sets the value of the 'code' field.
setCode(Integer) - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Sets the value of the 'code' field.
setCommand(String) - Method in class org.apache.nutch.util.CommandRunner
 
setConf(Configuration) - Method in class org.apache.nutch.analysis.lang.HTMLLanguageParser
 
setConf(Configuration) - Method in class org.apache.nutch.analysis.lang.LanguageIndexingFilter
 
setConf(Configuration) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
 
setConf(Configuration) - Method in class org.apache.nutch.crawl.AdaptiveFetchSchedule
 
setConf(Configuration) - Method in class org.apache.nutch.crawl.URLPartitioner.FetchEntryPartitioner
 
setConf(Configuration) - Method in class org.apache.nutch.crawl.URLPartitioner.SelectorEntryPartitioner
 
setConf(Configuration) - Method in class org.apache.nutch.crawl.URLPartitioner
 
setConf(Configuration) - Method in class org.apache.nutch.host.HostDbUpdateJob
 
setConf(Configuration) - Method in class org.apache.nutch.host.HostInjectorJob
 
setConf(Configuration) - Method in class org.apache.nutch.indexer.anchor.AnchorIndexingFilter
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.indexer.basic.BasicIndexingFilter
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.indexer.CleaningJob
 
setConf(Configuration) - Method in class org.apache.nutch.indexer.html.HtmlIndexingFilter
 
setConf(Configuration) - Method in class org.apache.nutch.indexer.IndexingFiltersChecker
 
setConf(Configuration) - Method in class org.apache.nutch.indexer.metadata.MetadataIndexer
 
setConf(Configuration) - Method in class org.apache.nutch.indexer.more.MoreIndexingFilter
 
setConf(Configuration) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
setConf(Configuration) - Method in class org.apache.nutch.indexer.tld.TLDIndexingFilter
 
setConf(Configuration) - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter
 
setConf(Configuration) - Method in class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
setConf(Configuration) - Method in class org.apache.nutch.microformats.reltag.RelTagIndexingFilter
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.microformats.reltag.RelTagParser
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer
 
setConf(Configuration) - Method in class org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer
 
setConf(Configuration) - Method in class org.apache.nutch.parse.html.DOMContentUtils
 
setConf(Configuration) - Method in class org.apache.nutch.parse.html.HtmlParser
 
setConf(Configuration) - Method in class org.apache.nutch.parse.js.JSParseFilter
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.parse.metatags.MetaTagsParser
 
setConf(Configuration) - Method in class org.apache.nutch.parse.ParserChecker
 
setConf(Configuration) - Method in class org.apache.nutch.parse.ParserJob
 
setConf(Configuration) - Method in class org.apache.nutch.parse.ParseUtil
 
setConf(Configuration) - Method in class org.apache.nutch.parse.tika.DOMContentUtils
 
setConf(Configuration) - Method in class org.apache.nutch.parse.tika.TikaParser
 
setConf(Configuration) - Method in class org.apache.nutch.protocol.file.File
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.protocol.ftp.Ftp
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.protocol.http.api.HttpBase
 
setConf(Configuration) - Method in class org.apache.nutch.protocol.http.Http
 
setConf(Configuration) - Method in class org.apache.nutch.protocol.httpclient.Http
Reads the configuration from the Nutch configuration files and sets the configuration.
setConf(Configuration) - Method in class org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory
 
setConf(Configuration) - Method in class org.apache.nutch.protocol.httpclient.HttpBasicAuthentication
 
setConf(Configuration) - Method in class org.apache.nutch.protocol.RobotRulesParser
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.protocol.sftp.Sftp
Set the Configuration object
setConf(Configuration) - Method in class org.apache.nutch.scoring.link.LinkAnalysisScoringFilter
 
setConf(Configuration) - Method in class org.apache.nutch.scoring.opic.OPICScoringFilter
 
setConf(Configuration) - Method in class org.apache.nutch.scoring.tld.TLDScoringFilter
 
setConf(Configuration) - Method in class org.apache.nutch.urlfilter.api.RegexURLFilterBase
 
setConf(Configuration) - Method in class org.apache.nutch.urlfilter.domain.DomainURLFilter
Sets the configuration.
setConf(Configuration) - Method in class org.apache.nutch.urlfilter.prefix.PrefixURLFilter
 
setConf(Configuration) - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
setConf(Configuration) - Method in class org.apache.nutch.urlfilter.validator.UrlValidator
 
setConf(Configuration) - Method in class org.apache.nutch.util.domain.DomainStatistics
 
setConf(Configuration) - Method in class org.apache.nutch.util.GenericWritableConfigurable
 
setConf(Configuration) - Method in class org.creativecommons.nutch.CCIndexingFilter
 
setConf(Configuration) - Method in class org.creativecommons.nutch.CCParseFilter
 
setConfId(String) - Method in class org.apache.nutch.api.model.request.JobConfig
 
setConfId(String) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setConfId(String) - Method in class org.apache.nutch.webui.client.model.JobConfig
 
setConfId(String) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setConfigId(String) - Method in class org.apache.nutch.api.model.request.NutchConfig
 
setConfiguration(Set<String>) - Method in class org.apache.nutch.api.model.response.NutchStatus
 
setConfiguration(Set<String>) - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
setConnectionStatus(ConnectionStatus) - Method in class org.apache.nutch.webui.model.NutchInstance
 
setContent(byte[]) - Method in class org.apache.nutch.protocol.Content
 
setContent(Content) - Method in class org.apache.nutch.protocol.ProtocolOutput
 
setContent(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'content' field
setContent(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'content' field.
setContent(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'content' field.
setContentType(String) - Method in class org.apache.nutch.protocol.Content
 
setContentType(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'contentType' field
setContentType(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'contentType' field.
setContentType(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'contentType' field.
setCrawlId(String) - Method in class org.apache.nutch.api.model.request.JobConfig
 
setCrawlId(String) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setCrawlId(String) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setCrawlId(String) - Method in class org.apache.nutch.webui.client.model.JobConfig
 
setCrawlId(String) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setCrawlName(String) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setDataTimeout(int) - Method in class org.apache.nutch.protocol.ftp.Client
Sets the timeout in milliseconds to use for data connection.
setDatum(WebPage) - Method in class org.apache.nutch.crawl.URLWebPage
 
setDescriptor(PluginDescriptor) - Method in class org.apache.nutch.plugin.Extension
Sets the plugin descriptor and is only used until model creation at system start up.
setDocumentLocator(Locator) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive an object for locating the origin of SAX document events.
setEndKey(String) - Method in class org.apache.nutch.api.model.request.DbFilter
 
setFetchInterval(int) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'fetchInterval' field
setFetchInterval(Integer) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'fetchInterval' field.
setFetchInterval(Integer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'fetchInterval' field.
setFetchSchedule(String, WebPage, long, long, long, long, int) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
Sets the fetchInterval and fetchTime on a successfully fetched page.
setFetchSchedule(String, WebPage, long, long, long, long, int) - Method in class org.apache.nutch.crawl.AdaptiveFetchSchedule
 
setFetchSchedule(String, WebPage, long, long, long, long, int) - Method in class org.apache.nutch.crawl.DefaultFetchSchedule
 
setFetchSchedule(String, WebPage, long, long, long, long, int) - Method in interface org.apache.nutch.crawl.FetchSchedule
Sets the fetchInterval and fetchTime on a successfully fetched page.
setFetchTime(long) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'fetchTime' field
setFetchTime(Long) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'fetchTime' field.
setFetchTime(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'fetchTime' field.
setFields(Set<String>) - Method in class org.apache.nutch.api.model.request.DbFilter
 
setFileType(int) - Method in class org.apache.nutch.protocol.ftp.Client
Sets the file type to be transferred.
setFilterFromPath(boolean) - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
setFollowTalk(boolean) - Method in class org.apache.nutch.protocol.ftp.Ftp
Set followTalk
setForce(boolean) - Method in class org.apache.nutch.api.model.request.NutchConfig
 
setFParsePluginsFile(String) - Method in class org.apache.nutch.parse.ParsePluginsReader
 
setHeaders(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'headers' field
setHeaders(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'headers' field.
setHeaders(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'headers' field.
setHost(String) - Method in class org.apache.nutch.webui.model.NutchInstance
 
setId(Long) - Method in class org.apache.nutch.api.model.request.SeedList
 
setId(Long) - Method in class org.apache.nutch.api.model.request.SeedUrl
 
setId(String) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setId(String) - Method in class org.apache.nutch.plugin.Extension
Sets the unique extension Id and is only used until model creation at system start up.
setId(Long) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setId(String) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setId(Long) - Method in class org.apache.nutch.webui.model.NutchInstance
 
setId(Long) - Method in class org.apache.nutch.webui.model.SeedList
 
setId(Long) - Method in class org.apache.nutch.webui.model.SeedUrl
 
setIDAttribute(String, Element) - Method in class org.apache.nutch.parse.html.DOMBuilder
Set an ID string to node association in the ID table.
setIgnoreCase(boolean) - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
setInfo(JobInfo) - Method in class org.apache.nutch.api.impl.JobWorker
 
setInlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host.Builder
Sets the value of the 'inlinks' field
setInlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host
Sets the value of the 'inlinks' field.
setInlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host.Tombstone
Sets the value of the 'inlinks' field.
setInlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'inlinks' field
setInlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'inlinks' field.
setInlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'inlinks' field.
setInputStream(InputStream) - Method in class org.apache.nutch.util.CommandRunner
 
setInstances(List<NutchInstance>) - Method in class org.apache.nutch.webui.config.NutchGuiConfiguration
 
setJobClassName(String) - Method in class org.apache.nutch.api.model.request.JobConfig
 
setJobClassName(String) - Method in class org.apache.nutch.webui.client.model.JobConfig
 
setJobConfig(JobConfig) - Method in class org.apache.nutch.webui.client.impl.RemoteCommand
 
setJobInfo(JobInfo) - Method in class org.apache.nutch.webui.client.impl.RemoteCommand
 
setJobs(Collection<JobInfo>) - Method in class org.apache.nutch.api.model.response.NutchStatus
 
setJobs(Collection<JobInfo>) - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
setKeepConnection(boolean) - Method in class org.apache.nutch.protocol.ftp.Ftp
Set keepConnection
setKeysReversed(boolean) - Method in class org.apache.nutch.api.model.request.DbFilter
 
setLastModified(long) - Method in class org.apache.nutch.storage.ProtocolStatus.Builder
Sets the value of the 'lastModified' field
setLastModified(Long) - Method in class org.apache.nutch.storage.ProtocolStatus
Sets the value of the 'lastModified' field.
setLastModified(Long) - Method in class org.apache.nutch.storage.ProtocolStatus.Tombstone
Sets the value of the 'lastModified' field.
setMajorCode(int) - Method in class org.apache.nutch.storage.ParseStatus.Builder
Sets the value of the 'majorCode' field
setMajorCode(Integer) - Method in class org.apache.nutch.storage.ParseStatus
Sets the value of the 'majorCode' field.
setMajorCode(Integer) - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Sets the value of the 'majorCode' field.
setMarkers(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'markers' field
setMarkers(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'markers' field.
setMarkers(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'markers' field.
setMaxContentLength(int) - Method in class org.apache.nutch.protocol.file.File
Set the point at which content is truncated.
setMaxContentLength(int) - Method in class org.apache.nutch.protocol.ftp.Ftp
Set the point at which content is truncated.
setMeta(String, String) - Method in class org.apache.nutch.metadata.MetaWrapper
Set metadata.
setMeta(String, byte[]) - Method in class org.apache.nutch.scoring.ScoreDatum
 
setMetadata(Metadata) - Method in class org.apache.nutch.protocol.Content
Other protocol-specific data.
setMetadata(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.Host.Builder
Sets the value of the 'metadata' field
setMetadata(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.Host
Sets the value of the 'metadata' field.
setMetadata(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.Host.Tombstone
Sets the value of the 'metadata' field.
setMetadata(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'metadata' field
setMetadata(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'metadata' field.
setMetadata(Map<CharSequence, ByteBuffer>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'metadata' field.
setMinorCode(int) - Method in class org.apache.nutch.storage.ParseStatus.Builder
Sets the value of the 'minorCode' field
setMinorCode(Integer) - Method in class org.apache.nutch.storage.ParseStatus
Sets the value of the 'minorCode' field.
setMinorCode(Integer) - Method in class org.apache.nutch.storage.ParseStatus.Tombstone
Sets the value of the 'minorCode' field.
setModeAccept(boolean) - Method in class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
setModel(IModel<Crawl>) - Method in class org.apache.nutch.webui.pages.crawls.CrawlPanel
 
setModel(IModel<NutchInstance>) - Method in class org.apache.nutch.webui.pages.instances.InstancePanel
 
setModifiedTime(long) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'modifiedTime' field
setModifiedTime(Long) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'modifiedTime' field.
setModifiedTime(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'modifiedTime' field.
setMsg(String) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setMsg(String) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setName(String) - Method in class org.apache.nutch.api.model.request.SeedList
 
setName(String) - Method in class org.apache.nutch.webui.model.NutchConfig
 
setName(String) - Method in class org.apache.nutch.webui.model.NutchInstance
 
setName(String) - Method in class org.apache.nutch.webui.model.SeedList
 
setNoCache() - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets noCache to true.
setNoFollow() - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets noFollow to true.
setNoIndex() - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets noIndex to true.
setNumberOfRounds(Integer) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setObject(String, Object) - Method in class org.apache.nutch.util.ObjectCache
 
setOutlinks(Outlink[]) - Method in class org.apache.nutch.parse.Parse
 
setOutlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host.Builder
Sets the value of the 'outlinks' field
setOutlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host
Sets the value of the 'outlinks' field.
setOutlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.Host.Tombstone
Sets the value of the 'outlinks' field.
setOutlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'outlinks' field
setOutlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'outlinks' field.
setOutlinks(Map<CharSequence, CharSequence>) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'outlinks' field.
setPageGoneSchedule(String, WebPage, long, long, long) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
This method specifies how to schedule refetching of pages marked as GONE.
setPageGoneSchedule(String, WebPage, long, long, long) - Method in interface org.apache.nutch.crawl.FetchSchedule
This method specifies how to schedule refetching of pages marked as GONE.
setPageRetrySchedule(String, WebPage, long, long, long) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
This method adjusts the fetch schedule if fetching needs to be re-tried due to transient errors.
setPageRetrySchedule(String, WebPage, long, long, long) - Method in interface org.apache.nutch.crawl.FetchSchedule
This method adjusts the fetch schedule if fetching needs to be re-tried due to transient errors.
setParams(Map<String, String>) - Method in class org.apache.nutch.api.model.request.NutchConfig
 
setParseStatus(ParseStatus) - Method in class org.apache.nutch.parse.Parse
 
setParseStatus(ParseStatus) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'parseStatus' field
setParseStatus(ParseStatus) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'parseStatus' field.
setParseStatus(ParseStatus) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'parseStatus' field.
setPassword(String) - Method in class org.apache.nutch.webui.model.NutchInstance
 
setPort(Integer) - Method in class org.apache.nutch.webui.model.NutchInstance
 
setPrevFetchTime(long) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'prevFetchTime' field
setPrevFetchTime(Long) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'prevFetchTime' field.
setPrevFetchTime(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'prevFetchTime' field.
setPrevModifiedTime(long) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'prevModifiedTime' field
setPrevModifiedTime(Long) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'prevModifiedTime' field.
setPrevModifiedTime(Long) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'prevModifiedTime' field.
setPrevSignature(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'prevSignature' field
setPrevSignature(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'prevSignature' field.
setPrevSignature(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'prevSignature' field.
setProgress(int) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setProperty(String, String, String) - Method in interface org.apache.nutch.api.ConfManager
 
setProperty(String, String, String) - Method in class org.apache.nutch.api.impl.RAMConfManager
 
setProtocolStatus(ProtocolStatus) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'protocolStatus' field
setProtocolStatus(ProtocolStatus) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'protocolStatus' field.
setProtocolStatus(ProtocolStatus) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'protocolStatus' field.
setRefresh(boolean) - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets refresh to the supplied value.
setRefreshHref(URL) - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets the refreshHref.
setRefreshTime(int) - Method in class org.apache.nutch.parse.HTMLMetaTags
Sets the refreshTime.
setRemoteVerificationEnabled(boolean) - Method in class org.apache.nutch.protocol.ftp.Client
Enable or disable verification that the remote host taking part of a data connection is the same as the host to which the control connection is attached.
setReprUrl(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'reprUrl' field
setReprUrl(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'reprUrl' field.
setReprUrl(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'reprUrl' field.
setRequestDelay(Duration) - Method in class org.apache.nutch.webui.client.impl.RemoteCommandExecutor
 
setResult(Map<String, Object>) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setResult(Map<String, Object>) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setRetriesSinceFetch(int) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'retriesSinceFetch' field
setRetriesSinceFetch(Integer) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'retriesSinceFetch' field.
setRetriesSinceFetch(Integer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'retriesSinceFetch' field.
setRunningJobs(Collection<JobInfo>) - Method in class org.apache.nutch.api.model.response.NutchStatus
 
setRunningJobs(Collection<JobInfo>) - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
setScore(FloatWritable) - Method in class org.apache.nutch.crawl.UrlWithScore
 
setScore(float) - Method in class org.apache.nutch.crawl.UrlWithScore
 
setScore(float) - Method in class org.apache.nutch.indexer.NutchDocument
 
setScore(float) - Method in class org.apache.nutch.scoring.ScoreDatum
 
setScore(float) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'score' field
setScore(Float) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'score' field.
setScore(Float) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'score' field.
setSeedDirectory(String) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setSeedList(SeedList) - Method in class org.apache.nutch.api.model.request.SeedUrl
 
setSeedList(SeedList) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setSeedList(SeedList) - Method in class org.apache.nutch.webui.model.SeedUrl
 
setSeedUrls(Collection<SeedUrl>) - Method in class org.apache.nutch.api.model.request.SeedList
 
setSeedUrls(Collection<SeedUrl>) - Method in class org.apache.nutch.webui.model.SeedList
 
setSignature(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'signature' field
setSignature(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'signature' field.
setSignature(ByteBuffer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'signature' field.
setStartDate(Date) - Method in class org.apache.nutch.api.model.response.NutchStatus
 
setStartDate(Date) - Method in class org.apache.nutch.webui.client.model.NutchStatus
 
setStartKey(String) - Method in class org.apache.nutch.api.model.request.DbFilter
 
setState(JobInfo.State) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setState(JobInfo.State) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setStatus(ProtocolStatus) - Method in class org.apache.nutch.protocol.ProtocolOutput
 
setStatus(int) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'status' field
setStatus(Integer) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'status' field.
setStatus(Integer) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'status' field.
setStatus(Crawl.CrawlStatus) - Method in class org.apache.nutch.webui.client.model.Crawl
 
setStdErrorStream(OutputStream) - Method in class org.apache.nutch.util.CommandRunner
 
setStdOutputStream(OutputStream) - Method in class org.apache.nutch.util.CommandRunner
 
setText(String) - Method in class org.apache.nutch.parse.Parse
 
setText(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'text' field
setText(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'text' field.
setText(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'text' field.
setTimeout(int) - Method in class org.apache.nutch.protocol.ftp.Ftp
Set the timeout.
setTimeout(int) - Method in class org.apache.nutch.util.CommandRunner
 
setTimeout(Duration) - Method in class org.apache.nutch.webui.client.impl.RemoteCommand
 
SettingsPage - Class in org.apache.nutch.webui.pages.settings
 
SettingsPage() - Constructor for class org.apache.nutch.webui.pages.settings.SettingsPage
 
setTitle(String) - Method in class org.apache.nutch.parse.Parse
 
setTitle(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Builder
Sets the value of the 'title' field
setTitle(CharSequence) - Method in class org.apache.nutch.storage.WebPage
Sets the value of the 'title' field.
setTitle(CharSequence) - Method in class org.apache.nutch.storage.WebPage.Tombstone
Sets the value of the 'title' field.
setType(JobManager.JobType) - Method in class org.apache.nutch.api.model.request.JobConfig
 
setType(JobManager.JobType) - Method in class org.apache.nutch.api.model.response.JobInfo
 
setType(JobInfo.JobType) - Method in class org.apache.nutch.webui.client.model.JobConfig
 
setType(String) - Method in class org.apache.nutch.webui.client.model.JobInfo
 
setup(Mapper<String, WebPage, UrlWithScore, NutchWritable>.Context) - Method in class org.apache.nutch.crawl.DbUpdateMapper
 
setup(Reducer<UrlWithScore, NutchWritable, String, WebPage>.Context) - Method in class org.apache.nutch.crawl.DbUpdateReducer
 
setup(Mapper<String, WebPage, GeneratorJob.SelectorEntry, WebPage>.Context) - Method in class org.apache.nutch.crawl.GeneratorMapper
 
setup(Reducer<GeneratorJob.SelectorEntry, WebPage, String, WebPage>.Context) - Method in class org.apache.nutch.crawl.GeneratorReducer
 
setup(Mapper<LongWritable, Text, String, WebPage>.Context) - Method in class org.apache.nutch.crawl.InjectorJob.UrlMapper
 
setup(Mapper<String, WebPage, Text, Text>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableRegexMapper
 
setup(Reducer<Text, LongWritable, Text, LongWritable>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatCombiner
 
setup(Mapper<String, WebPage, Text, LongWritable>.Context) - Method in class org.apache.nutch.crawl.WebTableReader.WebTableStatMapper
 
setup(Mapper<String, WebPage, IntWritable, FetchEntry>.Context) - Method in class org.apache.nutch.fetcher.FetcherJob.FetcherMapper
 
setup(Mapper<LongWritable, Text, String, Host>.Context) - Method in class org.apache.nutch.host.HostInjectorJob.UrlMapper
 
setup(Mapper<String, WebPage, String, WebPage>.Context) - Method in class org.apache.nutch.indexer.CleaningJob.CleanMapper
 
setup(Reducer<String, WebPage, NullWritable, NullWritable>.Context) - Method in class org.apache.nutch.indexer.CleaningJob.CleanReducer
 
setup(Mapper<String, WebPage, String, NutchDocument>.Context) - Method in class org.apache.nutch.indexer.IndexingJob.IndexerMapper
 
setup(Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>.Context) - Method in class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
setup(Mapper<String, WebPage, String, WebPage>.Context) - Method in class org.apache.nutch.parse.ParserJob.ParserMapper
 
setup(Mapper<String, WebPage, Text, LongWritable>.Context) - Method in class org.apache.nutch.util.domain.DomainStatistics.DomainStatisticsMapper
 
setUrl(String) - Method in class org.apache.nutch.api.model.request.SeedUrl
 
setUrl(String) - Method in class org.apache.nutch.crawl.URLWebPage
 
setUrl(Text) - Method in class org.apache.nutch.crawl.UrlWithScore
 
setUrl(String) - Method in class org.apache.nutch.crawl.UrlWithScore
 
setUrl(String) - Method in class org.apache.nutch.scoring.ScoreDatum
 
setUrl(String) - Method in class org.apache.nutch.webui.model.SeedUrl
 
setUsername(String) - Method in class org.apache.nutch.webui.model.NutchInstance
 
setValue(String) - Method in class org.apache.nutch.webui.model.NutchConfig
 
setWaitForExit(boolean) - Method in class org.apache.nutch.util.CommandRunner
 
setWebPage(WebPage) - Method in class org.apache.nutch.util.WebPageWritable
 
setWhiteList(ArrayList<String>) - Method in class org.apache.nutch.collection.Subcollection
 
setWhiteList(String) - Method in class org.apache.nutch.collection.Subcollection
Set contents of whitelist from String
Sftp - Class in org.apache.nutch.protocol.sftp
This class uses the Jsch package to fetch content using the Sftp protocol.
Sftp() - Constructor for class org.apache.nutch.protocol.sftp.Sftp
 
shortestMatch(String) - Method in class org.apache.nutch.util.PrefixStringMatcher
Returns the shortest prefix of input that is matched, or null if no match exists.
shortestMatch(String) - Method in class org.apache.nutch.util.SuffixStringMatcher
Returns the shortest suffix of input that is matched, or null if no match exists.
shortestMatch(String) - Method in class org.apache.nutch.util.TrieStringMatcher
Returns the shortest substring of input that is matched by a pattern in the trie, or null if no match exists.
shouldFetch(String, WebPage, long) - Method in class org.apache.nutch.crawl.AbstractFetchSchedule
This method provides information whether the page is suitable for selection in the current fetchlist.
shouldFetch(String, WebPage, long) - Method in interface org.apache.nutch.crawl.FetchSchedule
This method provides information whether the page is suitable for selection in the current fetchlist.
shouldProcess(CharSequence, Utf8) - Static method in class org.apache.nutch.util.NutchJob
 
shutDown() - Method in class org.apache.nutch.plugin.Plugin
Shutdown the plugin.
Signature - Class in org.apache.nutch.crawl
 
Signature() - Constructor for class org.apache.nutch.crawl.Signature
 
SIGNATURE_KEY - Static variable in interface org.apache.nutch.metadata.Nutch
 
SignatureComparator - Class in org.apache.nutch.crawl
 
SignatureComparator() - Constructor for class org.apache.nutch.crawl.SignatureComparator
 
SignatureFactory - Class in org.apache.nutch.crawl
Factory class, which instantiates a Signature implementation according to the current Configuration configuration.
size() - Method in class org.apache.nutch.metadata.Metadata
Returns the number of metadata names in this metadata.
SIZEOF_BOOLEAN - Static variable in class org.apache.nutch.util.Bytes
Size of boolean in bytes
SIZEOF_BYTE - Static variable in class org.apache.nutch.util.Bytes
Size of byte in bytes
SIZEOF_CHAR - Static variable in class org.apache.nutch.util.Bytes
Size of char in bytes
SIZEOF_DOUBLE - Static variable in class org.apache.nutch.util.Bytes
Size of double in bytes
SIZEOF_FLOAT - Static variable in class org.apache.nutch.util.Bytes
Size of float in bytes
SIZEOF_INT - Static variable in class org.apache.nutch.util.Bytes
Size of int in bytes
SIZEOF_LONG - Static variable in class org.apache.nutch.util.Bytes
Size of long in bytes
SIZEOF_SHORT - Static variable in class org.apache.nutch.util.Bytes
Size of short in bytes
skip(DataInput) - Static method in class org.apache.nutch.parse.Outlink
Skips over one Outlink in the input.
SKIP_TRUNCATED - Static variable in class org.apache.nutch.parse.ParserJob
 
skipChildren() - Method in class org.apache.nutch.util.NodeWalker
Skips over and removes from the node stack the children of the last node.
skippedEntity(String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of a skipped entity.
SOLR_PREFIX - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
SOLR_PREFIX - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
SolrConstants - Interface in org.apache.nutch.indexer.solr
 
SolrConstants - Interface in org.apache.nutch.indexwriter.solr
 
SolrDeleteDuplicates - Class in org.apache.nutch.indexer.solr
Utility class for deleting duplicate documents from a solr index.
SolrDeleteDuplicates() - Constructor for class org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 
SolrDeleteDuplicates.SolrInputFormat - Class in org.apache.nutch.indexer.solr
 
SolrDeleteDuplicates.SolrInputFormat() - Constructor for class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputFormat
 
SolrDeleteDuplicates.SolrInputSplit - Class in org.apache.nutch.indexer.solr
 
SolrDeleteDuplicates.SolrInputSplit() - Constructor for class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputSplit
 
SolrDeleteDuplicates.SolrInputSplit(int, int) - Constructor for class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrInputSplit
 
SolrDeleteDuplicates.SolrRecord - Class in org.apache.nutch.indexer.solr
 
SolrDeleteDuplicates.SolrRecord() - Constructor for class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecord
 
SolrDeleteDuplicates.SolrRecord(String, float, long) - Constructor for class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecord
 
SolrDeleteDuplicates.SolrRecordReader - Class in org.apache.nutch.indexer.solr
 
SolrDeleteDuplicates.SolrRecordReader(SolrDocumentList, int) - Constructor for class org.apache.nutch.indexer.solr.SolrDeleteDuplicates.SolrRecordReader
 
SolrIndexWriter - Class in org.apache.nutch.indexwriter.solr
 
SolrIndexWriter() - Constructor for class org.apache.nutch.indexwriter.solr.SolrIndexWriter
 
SolrMappingReader - Class in org.apache.nutch.indexwriter.solr
 
SolrMappingReader(Configuration) - Constructor for class org.apache.nutch.indexwriter.solr.SolrMappingReader
 
SolrUtils - Class in org.apache.nutch.indexer.solr
 
SolrUtils() - Constructor for class org.apache.nutch.indexer.solr.SolrUtils
 
SolrUtils - Class in org.apache.nutch.indexwriter.solr
 
SolrUtils() - Constructor for class org.apache.nutch.indexwriter.solr.SolrUtils
 
sortByValue() - Method in class org.apache.nutch.util.Histogram
 
sortInverseByValue() - Method in class org.apache.nutch.util.Histogram
 
SOURCE - Static variable in interface org.apache.nutch.metadata.DublinCore
A reference to a resource from which the present resource is derived.
SpellCheckedMetadata - Class in org.apache.nutch.metadata
A decorator to Metadata that adds spellchecking capabilities to property names.
SpellCheckedMetadata() - Constructor for class org.apache.nutch.metadata.SpellCheckedMetadata
 
split(byte[], byte[], int) - Static method in class org.apache.nutch.util.Bytes
Split passed range.
splitEnd - Variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
splitLen - Variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
splitStart - Variable in class org.apache.nutch.tools.arc.ArcRecordReader
 
SpringConfiguration - Class in org.apache.nutch.webui.config
 
SpringConfiguration() - Constructor for class org.apache.nutch.webui.config.SpringConfiguration
 
start() - Method in class org.apache.nutch.api.NutchServer
Starts the Nutch server printing some logging to the log file.
startCDATA() - Method in class org.apache.nutch.parse.html.DOMBuilder
Report the start of a CDATA section.
startCrawl(Long, NutchInstance) - Method in interface org.apache.nutch.webui.service.CrawlService
 
startCrawl(Long, NutchInstance) - Method in class org.apache.nutch.webui.service.impl.CrawlServiceImpl
 
startDocument() - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of the beginning of a document.
startDTD(String, String, String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Report the start of DTD declarations, if any.
startElement(String, String, String, Attributes) - Method in class org.apache.nutch.parse.html.DOMBuilder
Receive notification of the beginning of an element.
startEntity(String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Report the beginning of an entity.
startPrefixMapping(String, String) - Method in class org.apache.nutch.parse.html.DOMBuilder
Begin the scope of a prefix-URI Namespace mapping.
startsWith(byte[], byte[]) - Static method in class org.apache.nutch.util.Bytes
Return true if the byte array on the right is a prefix of the byte array on the left.
startUp() - Method in class org.apache.nutch.plugin.Plugin
Will be invoked until plugin start up.
STAT_COUNTERS - Static variable in interface org.apache.nutch.metadata.Nutch
Counters.
STAT_JOBS - Static variable in interface org.apache.nutch.metadata.Nutch
Jobs.
STAT_MESSAGE - Static variable in interface org.apache.nutch.metadata.Nutch
Status / result message.
STAT_PHASE - Static variable in interface org.apache.nutch.metadata.Nutch
Phase of processing.
STAT_PROGRESS - Static variable in interface org.apache.nutch.metadata.Nutch
Progress (float).
StatisticsPage - Class in org.apache.nutch.webui.pages
 
StatisticsPage() - Constructor for class org.apache.nutch.webui.pages.StatisticsPage
 
status - Variable in class org.apache.nutch.util.NutchTool
 
STATUS_BLOCKED - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_FAILED - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_FETCHED - Static variable in class org.apache.nutch.crawl.CrawlStatus
Page was successfully fetched.
STATUS_GONE - Static variable in class org.apache.nutch.crawl.CrawlStatus
Page no longer exists.
STATUS_GONE - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_MODIFIED - Static variable in interface org.apache.nutch.crawl.FetchSchedule
Page is known to have been modified since our last visit.
STATUS_NOTFETCHING - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_NOTFOUND - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_NOTMODIFIED - Static variable in class org.apache.nutch.crawl.CrawlStatus
Fetching successful - page is not modified.
STATUS_NOTMODIFIED - Static variable in interface org.apache.nutch.crawl.FetchSchedule
Page is known to remain unmodified since our last visit.
STATUS_NOTMODIFIED - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_REDIR_EXCEEDED - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_REDIR_PERM - Static variable in class org.apache.nutch.crawl.CrawlStatus
Page permanently redirects to other page.
STATUS_REDIR_TEMP - Static variable in class org.apache.nutch.crawl.CrawlStatus
Page temporarily redirects to other page.
STATUS_RETRY - Static variable in class org.apache.nutch.crawl.CrawlStatus
Fetching unsuccessful, needs to be retried (transient errors).
STATUS_RETRY - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_ROBOTS_DENIED - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_SUCCESS - Static variable in class org.apache.nutch.parse.ParseStatusUtils
 
STATUS_SUCCESS - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
STATUS_UNFETCHED - Static variable in class org.apache.nutch.crawl.CrawlStatus
Page was not fetched yet.
STATUS_UNKNOWN - Static variable in interface org.apache.nutch.crawl.FetchSchedule
It is unknown whether page was changed since our last visit.
STATUS_WOULDBLOCK - Static variable in class org.apache.nutch.protocol.ProtocolStatusUtils
 
stop(String, String) - Method in class org.apache.nutch.api.impl.RAMJobManager
 
stop(String, String) - Method in interface org.apache.nutch.api.JobManager
 
stop(boolean) - Method in class org.apache.nutch.api.NutchServer
Stop the Nutch server.
stop(boolean) - Method in class org.apache.nutch.api.resources.AdminResource
 
stop(String, String) - Method in class org.apache.nutch.api.resources.JobResource
 
stopJob() - Method in class org.apache.nutch.api.impl.JobWorker
 
stopJob() - Method in class org.apache.nutch.util.NutchTool
Stop the job with the possibility to resume.
StorageUtils - Class in org.apache.nutch.storage
Entry point to Gora store/mapreduce functionality.
StorageUtils() - Constructor for class org.apache.nutch.storage.StorageUtils
 
store - Variable in class org.apache.nutch.indexer.IndexingJob.IndexerMapper
 
StringUtil - Class in org.apache.nutch.util
A collection of String processing utility methods.
StringUtil() - Constructor for class org.apache.nutch.util.StringUtil
 
stripNonCharCodepoints(String) - Static method in class org.apache.nutch.indexer.solr.SolrUtils
 
stripNonCharCodepoints(String) - Static method in class org.apache.nutch.indexwriter.solr.SolrUtils
 
Subcollection - Class in org.apache.nutch.collection
SubCollection represents a subset of index, you can define url patterns that will indicate that particular page (url) is part of SubCollection.
Subcollection(String, String, Configuration) - Constructor for class org.apache.nutch.collection.Subcollection
public Constructor
Subcollection(Configuration) - Constructor for class org.apache.nutch.collection.Subcollection
 
SubcollectionIndexingFilter - Class in org.apache.nutch.indexer.subcollection
 
SubcollectionIndexingFilter() - Constructor for class org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter
 
SubcollectionIndexingFilter(Configuration) - Constructor for class org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter
 
SUBJECT - Static variable in interface org.apache.nutch.metadata.DublinCore
The topic of the content of the resource.
SUCCESS - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsing succeeded.
SUCCESS - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Content was retrieved without errors.
SUCCESS_OK - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
 
SUCCESS_REDIRECT - Static variable in interface org.apache.nutch.parse.ParseStatusCodes
Parsed content contains a directive to redirect to another URL.
SuffixStringMatcher - Class in org.apache.nutch.util
A class for efficiently matching Strings against a set of suffixes.
SuffixStringMatcher(String[]) - Constructor for class org.apache.nutch.util.SuffixStringMatcher
Creates a new PrefixStringMatcher which will match Strings with any suffix in the supplied array.
SuffixStringMatcher(Collection<String>) - Constructor for class org.apache.nutch.util.SuffixStringMatcher
Creates a new PrefixStringMatcher which will match Strings with any suffix in the supplied Collection
SuffixURLFilter - Class in org.apache.nutch.urlfilter.suffix
Filters URLs based on a file of URL suffixes.
SuffixURLFilter() - Constructor for class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 
SuffixURLFilter(Reader) - Constructor for class org.apache.nutch.urlfilter.suffix.SuffixURLFilter
 

T

TableUtil - Class in org.apache.nutch.util
 
TableUtil() - Constructor for class org.apache.nutch.util.TableUtil
 
TAG_BLACKLIST - Static variable in class org.apache.nutch.collection.Subcollection
 
TAG_COLLECTION - Static variable in class org.apache.nutch.collection.Subcollection
 
TAG_COLLECTIONS - Static variable in class org.apache.nutch.collection.Subcollection
 
TAG_ID - Static variable in class org.apache.nutch.collection.Subcollection
 
TAG_NAME - Static variable in class org.apache.nutch.collection.Subcollection
 
TAG_WHITELIST - Static variable in class org.apache.nutch.collection.Subcollection
 
tail(byte[], int) - Static method in class org.apache.nutch.util.Bytes
 
TEMP_MOVED - Static variable in interface org.apache.nutch.protocol.ProtocolStatusCodes
Resource has moved temporarily.
terminal - Variable in class org.apache.nutch.util.TrieStringMatcher.TrieNode
 
TestbedProxy - Class in org.apache.nutch.tools.proxy
 
TestbedProxy() - Constructor for class org.apache.nutch.tools.proxy.TestbedProxy
 
TextMD5Signature - Class in org.apache.nutch.crawl
Default implementation of a page signature.
TextMD5Signature() - Constructor for class org.apache.nutch.crawl.TextMD5Signature
 
TextProfileSignature - Class in org.apache.nutch.crawl
An implementation of a page signature.
TextProfileSignature() - Constructor for class org.apache.nutch.crawl.TextProfileSignature
 
THREADS_KEY - Static variable in class org.apache.nutch.fetcher.FetcherJob
 
throwBadRequestException(String) - Method in class org.apache.nutch.api.resources.AbstractResource
 
TikaParser - Class in org.apache.nutch.parse.tika
Wrapper for Tika parsers.
TikaParser() - Constructor for class org.apache.nutch.parse.tika.TikaParser
 
timeout - Variable in class org.apache.nutch.protocol.http.api.HttpBase
The network timeout in millisecond
TIMESTAMP_FIELD - Static variable in interface org.apache.nutch.indexer.solr.SolrConstants
 
TIMESTAMP_FIELD - Static variable in interface org.apache.nutch.indexwriter.solr.SolrConstants
 
TimingUtil - Class in org.apache.nutch.util
 
TimingUtil() - Constructor for class org.apache.nutch.util.TimingUtil
 
TITLE - Static variable in interface org.apache.nutch.metadata.DublinCore
A name given to the resource.
TLDIndexingFilter - Class in org.apache.nutch.indexer.tld
Adds the Top level domain extensions to the index
TLDIndexingFilter() - Constructor for class org.apache.nutch.indexer.tld.TLDIndexingFilter
 
TLDScoringFilter - Class in org.apache.nutch.scoring.tld
Scoring filter to boost tlds.
TLDScoringFilter() - Constructor for class org.apache.nutch.scoring.tld.TLDScoringFilter
 
tlsPreferredCipherSuites - Variable in class org.apache.nutch.protocol.http.api.HttpBase
Which TLS/SSL cipher suites to support
tlsPreferredProtocols - Variable in class org.apache.nutch.protocol.http.api.HttpBase
Which TLS/SSL protocols to support
toArgMap(Object...) - Static method in class org.apache.nutch.util.ToolUtil
 
toASCII(String) - Static method in class org.apache.nutch.util.URLUtil
 
toBinaryFromHex(byte) - Static method in class org.apache.nutch.util.Bytes
Takes a ASCII digit in the range A-F0-9 and returns the corresponding integer/ordinal value.
toBoolean(byte[]) - Static method in class org.apache.nutch.util.Bytes
toByteArrays(String[]) - Static method in class org.apache.nutch.util.Bytes
 
toByteArrays(String) - Static method in class org.apache.nutch.util.Bytes
 
toByteArrays(byte[]) - Static method in class org.apache.nutch.util.Bytes
 
toBytes(ByteBuffer) - Static method in class org.apache.nutch.util.Bytes
Returns a new byte array, copied from the passed ByteBuffer.
toBytes(String) - Static method in class org.apache.nutch.util.Bytes
Converts a string to a UTF-8 byte array.
toBytes(boolean) - Static method in class org.apache.nutch.util.Bytes
Convert a boolean to a byte array.
toBytes(long) - Static method in class org.apache.nutch.util.Bytes
Convert a long value to a byte array using big-endian.
toBytes(float) - Static method in class org.apache.nutch.util.Bytes
 
toBytes(double) - Static method in class org.apache.nutch.util.Bytes
Serialize a double as the IEEE 754 double format output.
toBytes(int) - Static method in class org.apache.nutch.util.Bytes
Convert an int value to a byte array
toBytes(short) - Static method in class org.apache.nutch.util.Bytes
Convert a short value to a byte array of Bytes.SIZEOF_SHORT bytes long.
toBytesBinary(String) - Static method in class org.apache.nutch.util.Bytes
 
toContent() - Method in class org.apache.nutch.protocol.file.FileResponse
 
toContent() - Method in class org.apache.nutch.protocol.ftp.FtpResponse
 
toDate(String) - Static method in class org.apache.nutch.net.protocols.HttpDateFormat
 
toDouble(byte[]) - Static method in class org.apache.nutch.util.Bytes
 
toDouble(byte[], int) - Static method in class org.apache.nutch.util.Bytes
 
toFloat(byte[]) - Static method in class org.apache.nutch.util.Bytes
Presumes float encoded as IEEE 754 floating-point "single format"
toFloat(byte[], int) - Static method in class org.apache.nutch.util.Bytes
Presumes float encoded as IEEE 754 floating-point "single format"
toHexString(ByteBuffer) - Static method in class org.apache.nutch.util.StringUtil
Convenience call for StringUtil.toHexString(ByteBuffer, String, int), where sep = null; lineLen = Integer.MAX_VALUE.
toHexString(ByteBuffer, String, int) - Static method in class org.apache.nutch.util.StringUtil
Get a text representation of a ByteBuffer as hexadecimal String, where each pair of hexadecimal digits corresponds to consecutive bytes in the array.
toHexString(byte[]) - Static method in class org.apache.nutch.util.StringUtil
Convenience call for StringUtil.toHexString(byte[], String, int), where sep = null; lineLen = Integer.MAX_VALUE.
toHexString(byte[], String, int) - Static method in class org.apache.nutch.util.StringUtil
Get a text representation of a byte[] as hexadecimal String, where each pair of hexadecimal digits corresponds to consecutive bytes in the array.
toHexString(byte[], int, int, String, int) - Static method in class org.apache.nutch.util.StringUtil
Get a text representation of a byte[] as hexadecimal String, where each pair of hexadecimal digits corresponds to consecutive bytes in the array.
toInt(byte[]) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to an int value
toInt(byte[], int) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to an int value
toInt(byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to an int value
toLong(String) - Static method in class org.apache.nutch.net.protocols.HttpDateFormat
 
toLong(byte[]) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to a long value.
toLong(byte[], int) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to a long value.
toLong(byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to a long value.
ToolUtil - Class in org.apache.nutch.util
 
ToolUtil() - Constructor for class org.apache.nutch.util.ToolUtil
 
TopLevelDomain - Class in org.apache.nutch.util.domain
(From wikipedia) A top-level domain (TLD) is the last part of an Internet domain name; that is, the letters which follow the final dot of any domain name.
TopLevelDomain(String, TopLevelDomain.Type, DomainSuffix.Status, float) - Constructor for class org.apache.nutch.util.domain.TopLevelDomain
 
TopLevelDomain(String, DomainSuffix.Status, float, String) - Constructor for class org.apache.nutch.util.domain.TopLevelDomain
 
TopLevelDomain.Type - Enum in org.apache.nutch.util.domain
 
toShort(byte[]) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to a short value
toShort(byte[], int) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to a short value
toShort(byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
Converts a byte array to a short value
toString() - Method in class org.apache.nutch.crawl.UrlWithScore
 
toString() - Method in class org.apache.nutch.fetcher.FetchEntry
 
toString() - Method in class org.apache.nutch.indexer.NutchDocument
A utility-like method which can easily be used to write any NutchDocument object to string for simple debugging.
toString() - Method in class org.apache.nutch.metadata.Metadata
 
toString(Date) - Static method in class org.apache.nutch.net.protocols.HttpDateFormat
Get the HTTP format of the specified date.
toString(Calendar) - Static method in class org.apache.nutch.net.protocols.HttpDateFormat
 
toString(long) - Static method in class org.apache.nutch.net.protocols.HttpDateFormat
 
toString() - Method in class org.apache.nutch.parse.html.DOMContentUtils.LinkParams
 
toString() - Method in class org.apache.nutch.parse.HTMLMetaTags
 
toString() - Method in class org.apache.nutch.parse.Outlink
 
toString(ParseStatus) - Static method in class org.apache.nutch.parse.ParseStatusUtils
 
toString() - Method in class org.apache.nutch.protocol.Content
 
toString(ProtocolStatus) - Static method in class org.apache.nutch.protocol.ProtocolStatusUtils
 
toString() - Method in class org.apache.nutch.scoring.ScoreDatum
 
toString() - Method in enum org.apache.nutch.storage.Host.Field
Gets field's attributes to string.
toString() - Method in enum org.apache.nutch.storage.ParseStatus.Field
Gets field's attributes to string.
toString() - Method in enum org.apache.nutch.storage.ProtocolStatus.Field
Gets field's attributes to string.
toString() - Method in enum org.apache.nutch.storage.WebPage.Field
Gets field's attributes to string.
toString() - Method in class org.apache.nutch.tools.Benchmark.BenchmarkResults
 
toString(ByteBuffer) - Static method in class org.apache.nutch.util.Bytes
This method will convert utf8 encoded bytes into a string.
toString(byte[]) - Static method in class org.apache.nutch.util.Bytes
 
toString(byte[], String, byte[]) - Static method in class org.apache.nutch.util.Bytes
Joins two byte arrays together using a separator.
toString(byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
This method will convert utf8 encoded bytes into a string.
toString() - Method in class org.apache.nutch.util.domain.DomainSuffix
 
toString(List<E>) - Method in class org.apache.nutch.util.Histogram
 
toString(CharSequence) - Static method in class org.apache.nutch.util.TableUtil
Convert given Utf8 instance to String and and cleans out any offending "�" from the String.
toString() - Method in class org.apache.nutch.webui.client.impl.RemoteCommand
 
toStringArray(Collection<WebPage.Field>) - Static method in class org.apache.nutch.storage.StorageUtils
 
toStringBinary(ByteBuffer) - Static method in class org.apache.nutch.util.Bytes
Write a printable representation of a ByteBuffer.
toStringBinary(byte[]) - Static method in class org.apache.nutch.util.Bytes
Write a printable representation of a byte array.
toStringBinary(byte[], int, int) - Static method in class org.apache.nutch.util.Bytes
Write a printable representation of a byte array.
toUNICODE(String) - Static method in class org.apache.nutch.util.URLUtil
 
TRANSFER_ENCODING - Static variable in interface org.apache.nutch.metadata.HttpHeaders
 
TrieStringMatcher - Class in org.apache.nutch.util
TrieStringMatcher is a base class for simple tree-based string matching.
TrieStringMatcher() - Constructor for class org.apache.nutch.util.TrieStringMatcher
 
TrieStringMatcher.TrieNode - Class in org.apache.nutch.util
Node class for the character tree.
TYPE - Static variable in interface org.apache.nutch.metadata.DublinCore
The nature or genre of the content of the resource.

U

unreverseHost(String) - Static method in class org.apache.nutch.util.TableUtil
 
unreverseUrl(String) - Static method in class org.apache.nutch.util.TableUtil
 
unzip(byte[]) - Static method in class org.apache.nutch.util.GZIPUtils
Returns an gunzipped copy of the input array.
unzipBestEffort(byte[]) - Static method in class org.apache.nutch.util.GZIPUtils
Returns an gunzipped copy of the input array.
unzipBestEffort(byte[], int) - Static method in class org.apache.nutch.util.GZIPUtils
Returns an gunzipped copy of the input array, truncated to sizeLimit bytes, if necessary.
update(String, String, String) - Method in class org.apache.nutch.api.resources.ConfigResource
 
update(NutchDocument) - Method in interface org.apache.nutch.indexer.IndexWriter
 
update(NutchDocument) - Method in class org.apache.nutch.indexer.IndexWriters
 
update(NutchDocument) - Method in class org.apache.nutch.indexwriter.elastic.ElasticIndexWriter