public class PairMutualInformation extends TextByteByteStage
Modifier and Type | Field and Description |
---|---|
static String |
PAIR_STATS_FILE
The location of the statistics file
|
static String |
PAIRMI_DIR
The pairMI output directory
|
static String |
TIMEDELTA
The time delta between time periods
|
static String |
TIMEPERIOD_COUNT_OUTPUT_ROOT
The root directory where timeperiod pair counts will be stored
|
static String |
TIMEPERIOD_OUTPUT_NAME
Name of the timeperiod count directory
|
Constructor and Description |
---|
PairMutualInformation(String[] nonHadoopArgs,
long timedelta) |
Modifier and Type | Method and Description |
---|---|
Class<? extends org.apache.hadoop.mapreduce.Reducer<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>> |
combiner()
By default this method returns the
IdentityReducer class. |
void |
finished(org.apache.hadoop.mapreduce.Job job)
Called when the stage's job is completed.
|
static WritablePairEnum |
loadStats(org.apache.hadoop.fs.Path outpath)
Load the PointwisePMI stats file from an output location (Path: outpath/
PAIR_STATS_FILE |
static Map<Long,Long> |
loadTimeCounts(org.apache.hadoop.fs.Path pairmiloc)
Load the total pairs seen in every time period from the pairmi location
provided
|
Class<PairEmit> |
mapper()
By default this method returns the
IdentityMapper class. |
String |
outname() |
Class<? extends org.apache.hadoop.mapreduce.Reducer<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>> |
reducer()
By default this method returns the
IdentityReducer class. |
void |
setup(org.apache.hadoop.mapreduce.Job job)
Add any final adjustments to the job's config
|
org.apache.hadoop.mapreduce.Job |
stage(org.apache.hadoop.fs.Path[] inputs,
org.apache.hadoop.fs.Path output,
org.apache.hadoop.conf.Configuration conf) |
lzoCompress, setCombinerClass, setMapperClass, setReducerClass
public static final String TIMEDELTA
public static final String PAIR_STATS_FILE
public static final String PAIRMI_DIR
public static final String TIMEPERIOD_COUNT_OUTPUT_ROOT
public static final String TIMEPERIOD_OUTPUT_NAME
public PairMutualInformation(String[] nonHadoopArgs, long timedelta)
nonHadoopArgs
- the arguments for configurationtimedelta
- public void setup(org.apache.hadoop.mapreduce.Job job) throws IOException
Stage
setup
in class Stage<org.apache.hadoop.mapreduce.lib.input.TextInputFormat,org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>
IOException
public Class<PairEmit> mapper()
Stage
IdentityMapper
class. This
mapper outputs the values handed as they are.mapper
in class Stage<org.apache.hadoop.mapreduce.lib.input.TextInputFormat,org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>
public Class<? extends org.apache.hadoop.mapreduce.Reducer<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>> combiner()
Stage
IdentityReducer
class. This
combiner outputs the values handed as they are.combiner
in class Stage<org.apache.hadoop.mapreduce.lib.input.TextInputFormat,org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>
public org.apache.hadoop.mapreduce.Job stage(org.apache.hadoop.fs.Path[] inputs, org.apache.hadoop.fs.Path output, org.apache.hadoop.conf.Configuration conf) throws Exception
stage
in class Stage<org.apache.hadoop.mapreduce.lib.input.TextInputFormat,org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>
inputs
- the input paths to be expectedoutput
- the output locationconf
- the job configurationException
IOException
public Class<? extends org.apache.hadoop.mapreduce.Reducer<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>> reducer()
Stage
IdentityReducer
class. This
reducer outputs the values handed as they are.reducer
in class Stage<org.apache.hadoop.mapreduce.lib.input.TextInputFormat,org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>
public String outname()
outname
in class Stage<org.apache.hadoop.mapreduce.lib.input.TextInputFormat,org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>
public void finished(org.apache.hadoop.mapreduce.Job job)
Stage
finished
in class Stage<org.apache.hadoop.mapreduce.lib.input.TextInputFormat,org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable>
public static WritablePairEnum loadStats(org.apache.hadoop.fs.Path outpath) throws IOException
PAIR_STATS_FILE
outpath
- IOException
public static Map<Long,Long> loadTimeCounts(org.apache.hadoop.fs.Path pairmiloc) throws IOException
pairmiloc
- a directory which contains PAIRMI_DIR
/
TIMEPERIOD_OUTPUT_NAME
IOException