org.apache.nutch.crawl
Class LinkDb

java.lang.Object
  extended by org.apache.hadoop.conf.Configured
      extended by org.apache.nutch.crawl.LinkDb
All Implemented Interfaces:
Closeable, Configurable, JobConfigurable, Mapper<Text,ParseData,Text,Inlinks>, Tool

public class LinkDb
extends Configured
implements Tool, Mapper<Text,ParseData,Text,Inlinks>

Maintains an inverted link map, listing incoming links for each url.


Field Summary
static String CURRENT_NAME
           
static String IGNORE_INTERNAL_LINKS
           
static String LOCK_NAME
           
static org.slf4j.Logger LOG
           
 
Constructor Summary
LinkDb()
           
LinkDb(Configuration conf)
           
 
Method Summary
 void close()
           
 void configure(JobConf job)
           
static void install(JobConf job, Path linkDb)
           
 void invert(Path linkDb, Path[] segments, boolean normalize, boolean filter, boolean force)
           
 void invert(Path linkDb, Path segmentsDir, boolean normalize, boolean filter, boolean force)
           
static void main(String[] args)
           
 void map(Text key, ParseData parseData, OutputCollector<Text,Inlinks> output, Reporter reporter)
           
 int run(String[] args)
           
 
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.hadoop.conf.Configurable
getConf, setConf
 

Field Detail

LOG

public static final org.slf4j.Logger LOG

IGNORE_INTERNAL_LINKS

public static final String IGNORE_INTERNAL_LINKS
See Also:
Constant Field Values

CURRENT_NAME

public static final String CURRENT_NAME
See Also:
Constant Field Values

LOCK_NAME

public static final String LOCK_NAME
See Also:
Constant Field Values
Constructor Detail

LinkDb

public LinkDb()

LinkDb

public LinkDb(Configuration conf)
Method Detail

configure

public void configure(JobConf job)
Specified by:
configure in interface JobConfigurable

close

public void close()
Specified by:
close in interface Closeable

map

public void map(Text key,
                ParseData parseData,
                OutputCollector<Text,Inlinks> output,
                Reporter reporter)
         throws IOException
Specified by:
map in interface Mapper<Text,ParseData,Text,Inlinks>
Throws:
IOException

invert

public void invert(Path linkDb,
                   Path segmentsDir,
                   boolean normalize,
                   boolean filter,
                   boolean force)
            throws IOException
Throws:
IOException

invert

public void invert(Path linkDb,
                   Path[] segments,
                   boolean normalize,
                   boolean filter,
                   boolean force)
            throws IOException
Throws:
IOException

install

public static void install(JobConf job,
                           Path linkDb)
                    throws IOException
Throws:
IOException

main

public static void main(String[] args)
                 throws Exception
Throws:
Exception

run

public int run(String[] args)
        throws Exception
Specified by:
run in interface Tool
Throws:
Exception


Copyright © 2012 The Apache Software Foundation