Hadoop MapReduce case - statistics of mobile phone uplink traffic, downlink traffic and total traffic

Keywords: Programming Hadoop Apache Java Mobile

The format of the log is as follows: required fields, the second column: mobile number (user), the third last column: uplink traffic, the second last column: downlink popular

Train of thought:

Encapsulate the uplink traffic, downlink traffic and total traffic into an object bean. In the map, context.write (cell phone number, bean) and reduce are used to count the traffic of each user.

Transfer custom data type Bean in MapReduce:

(1) to transmit among nodes of Hadoop, we must implement its serialization mechanism, implement the Writable interface, rewrite two methods: readFields(DataInput), serialize the Bean into the transport stream, and write(DataOutput): restore from the transport stream to Bean

(2) the serialization mechanism of Hadoop is different from that of the native JDK. It only transfers the Bean itself, not the inherited structure information, as long as the data is used to reduce redundancy

(3) when serializing and restoring, the bottom layer uses reflection - > no parameter construction method is required

(4) if you want to implement custom sorting, you must implement the writablecompatible < T > interface. Note: the combination of writable + compatible < T > cannot be used, otherwise an initialization exception will be thrown

It is defined as a flow bean and implements the writablecompatible interface

import org.apache.hadoop.io.WritableComparable;
 
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
 
/**
 * @Description TODO Traffic bean
 * @Date 10-17-2018
 * @ClassName:FlowBean
 */
 
public class FlowBean implements WritableComparable<FlowBean> {
 
    private long upFlow;// Uplink traffic
    private long downFlow;// Downstream traffic
    private long sumFlow;// Total discharge
 
    // Serialization requires a reflection call to a null parameterless construct
    public FlowBean() {
        super();
    }
 
    public FlowBean(long upFlow, long downFlow) {
        super();
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = upFlow + downFlow;
    }
 
    public long getUpFlow() {
        return upFlow;
    }
 
    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }
 
    public long getDownFlow() {
        return downFlow;
    }
 
    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }
 
    public long getSumFlow() {
        return sumFlow;
    }
 
    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }/*
     * Serialization: sequence the data we want to transmit into a byte stream
     *
     * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
     */
 
    //@Override
    public void write(DataOutput out) throws IOException {
        out.writeLong(upFlow);
        out.writeLong(downFlow);
        out.writeLong(sumFlow);
    }
 
    /**
     * Deserialization: recover fields from byte stream
     *
     * The order is the same as that of serialization
     */
 
    //@Override
    public void readFields(DataInput in) throws IOException {
        upFlow = in.readLong();
        downFlow = in.readLong();
        sumFlow = in.readLong();
    }
 
    public String toString() {
        return upFlow + "\t" + downFlow + "\t" + sumFlow;
    }
 
    //@Override
    public  int compareTo(FlowBean fb) {
        //Reverse sort
        // TODO Auto-generated method stub
        //Return 1 for exchange and - 1 for no exchange.
        return this.sumFlow > fb.getSumFlow() ? -1 : 1;
    }
 
}

mapreduce Code:

/**
 * @Descripition:Calculate the upstream and downstream traffic of users and the total traffic, and sort the total traffic in reverse order.
 * @Date:2018-10-17
 * @ClassName: Flow_log
 *
 */
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
import java.io.IOException;
 
public class Flow_log  {
 
    //Map
    public static class myMapper extends Mapper<LongWritable, Text, FlowBean, Text>{
        public void map(LongWritable key, Text value, Context context) throws IOException,InterruptedException {
            String[] line = value.toString().split("\t");
            String phone = line[1];
            int size = line.length;
            Long upRate = Long.parseLong(line[size - 3]);
            Long downRate = Long.parseLong(line[size - 2]);
            context.write(new FlowBean(upRate,downRate), new Text(phone));
        }
    }
 
    //Reduce
 
    /**
     * In Hadoop's default sorting algorithm, only key values are sorted,
     * So we should change the position of Text, FlowBean,
     * Let the reduce sorting algorithm sort the flowbeans,
     * And override the compareTo method in the FlowBean
     */
    public static class myReducer extends Reducer<FlowBean, Text, Text, FlowBean>{
        public void reduce (FlowBean key, Iterable<Text> values, Context context) throws IOException,InterruptedException{
            /*long upRate_count = 0;
            long downRate_count = 0;
            long sumRate = 0;
            for(FlowBean b : values){
                upRate_count += b.getUpFlow();
                downRate_count += b.getDownFlow();
                //sumRate = upRate_count + downRate_count;
            }*/
            context.write(values.iterator().next(), key);
 
        }
    }
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
 
        Job job = Job.getInstance(conf, "Flow_log");
        job.setJarByClass(Flow_log.class);
        job.setMapperClass(myMapper.class);
        job.setReducerClass(myReducer.class);
 
        //Specify the k, v type of mapper output data
        job.setMapOutputKeyClass(FlowBean.class);
        job.setMapOutputValueClass(Text.class);
 
        //Specify the k, v type of the final output data
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);
 
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true)? 0 :1);
    }
 
}

Original text: https://blog.csdn.net/qq_40309183/article/details/83111495

Posted by cloudbase on Fri, 29 Nov 2019 09:44:30 -0800