实验八项目案例-电商数据分析

admin • 2021-12-05 21:36 • 云计算

电商大数据离线计算

第1关：统计用户流失情况
第2关：统计所有商品点击量排行
第3关：统计各个商品类别中点击量最高的商品
第4关：统计五种商品类别占比
第5关：统计各类商品种类的购买次数
第6关：统计五类商品中各自点击量最高的商品的购买次数

第1关：统计用户流失情况

任务描述

本关任务：根据用户行为数据，编写 MapReduce 程序来统计出用户流失情况。

编程要求

根据提示，在右侧编辑器补充代码，计算得出商品点击量排行。

main 方法已给出，其中 Job 和输入输出路径已配置完成，无需更改；
map 和 reduce 的输入输出 key、value 已给出；
编程中直接写 map 与 reduce 过程的主要内容即可。

预期输出格式:

buy,总数
cart,总数
fav,总数
pv,总数

测试说明

平台会对你编写的代码进行测试，如果编写的 MapReduce 输出与预期一致，则通过。

注：出于显示原因，网页端的 mapreduce 的输出结果中制表符统一用逗号代替显示，但在实际 reduce 结果中 keyvalue 仍是原样制表符分割，这只是显示上的变化，不影响编程与评测结果。

开始你的任务吧，祝你成功！

代码实现

package educoder;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * UserLoseDriver
 */
public class UserLoseDriver {

      public static class ThisMap extends Mapper<Object, Text, Text, IntWritable> {
        //私有变量1，可重复使用
        private static IntWritable one = new IntWritable(1);
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            /*** 在这编写map内容 ****/
            /********** Begin **********/
            //分割每行数据
            String[] atts = value.toString().split(",");
            //得到行为属性
            String behavior = atts[3];
            //行为属性作key，1作value的map输出
            context.write(new Text(behavior), one);
            /********** End **********/
        }
    }
    public static class ThisReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            /*** 在这编写reduce内容 ****/
            /********** Begin **********/
            //统计同key的values总数
            int sum = 0;
            for(IntWritable one : values){
                sum += one.get();
            }
            //写入到reduce输出
            context.write(key, new IntWritable(sum));
            /********** End **********/
        }
    }
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "用户流失情况查询");

        job.setJarByClass(UserLoseDriver.class);
        job.setMapperClass(ThisMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(ThisReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

第2关：统计所有商品点击量排行

任务描述

本关任务：根据用户行为数据，编写 MapReduce 程序来统计出商品点击量排行。

编程要求

根据提示，在右侧编辑器补充代码，计算得出商品点击量排行。

main 方法已给出，其中 Job 和输入输出路径已配置完成，无需更改；
map 和 reduce 的输入输出 key、value 已给出；
编程中直接写 map 与 reduce 过程的主要内容即可。

预期输出格式（按点击量从大到小）:

商品id,点击量
商品id,点击量
···
···

测试说明

平台会对你编写的代码进行测试，如果编写的 MapReduce 输出与预期一致，则通过。

开始你的任务吧，祝你成功！

代码实现

package educoder;

import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * UserLoseDriver
 */
public class ItemClickRankDriver {

     public static class ThisMap extends Mapper<Object, Text, Text, IntWritable> {
        private static IntWritable one = new IntWritable(1);
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            /*** 在这编写map内容 ****/
            /********** Begin **********/
            //1. 分割每行数据
            String[] atts = value.toString().split(",");
            //2. 得到商品id
            String item = atts[1];
            //3. 得到行为属性
            String behavior = atts[3];
            //4. 如果行为属性是 'pv'，则写入到map输出
            if (behavior.equals("pv")) {
                context.write(new Text(item), one);
            }
            /********** End **********/
        }
    }
    public static class ThisReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
        //对象实例，用来保存reduce方法中处理的数据
        List<Object[]> list = new LinkedList<>();
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            /*** 在这编写reduce内容 ****/
            /********** Begin **********/
            // 统计同key总数， 把key和sum写入到list中
            int sum = 0;
            for (IntWritable one : values) {
                sum += one.get();
            }
            list.add(new Object[] { key.toString(), Integer.valueOf(sum) });
            /********** End **********/
        }
        //cleanup方法，即reduce对象执行完所有的reduce方法后最后执行的方法
        @Override
        protected void cleanup(Reducer<Text, IntWritable, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            // 按照sum的大小对list进行排序，得到的结果是从小到大
            list = list.stream().sorted((o1, o2) -> { return ((int)o1[1] - (int)o2[1]);}).collect(Collectors.toList());
            // 从后向前遍历，即从大到小
            for(int i=list.size()-1; i>=0; i--){
                Object[] o = list.get(i);
                //写入到reduce输出
                context.write(new Text((String) o[0]), new IntWritable((int) o[1]));
            }
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "商品点击量排行");

        job.setJarByClass(ItemClickRankDriver.class);
        job.setMapperClass(ThisMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(ThisReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

第3关：统计各个商品类别中点击量最高的商品

任务描述

本关任务：根据用户行为数据，编写 MapReduce 程序来统计各个商品类别中点击量最高的商品。

编程要求

根据提示，在右侧编辑器补充代码，计算得出各个商品类别中点击量最高的商品。

main 方法已给出，其中 Job 和输入输出路径已配置完成，无需更改；
map 和 reduce 的输入输出 key、value 已给出；
编程中直接写 map 与 reduce 过程的主要内容即可。

预期输出格式:

商品类型,点击量最高的商品id
商品类型,点击量最高的商品id
···

测试说明
平台会对你编写的代码进行测试，如果编写的 MapReduce 输出与预期一致，则通过。

注：出于显示原因，网页端的 mapreduce 的输出结果中制表符统一用逗号代替显示，但在实际 reduce 结果中 keyvalue 仍是原样制表符分割，这只是显示上的变化，不影响编程与评测结果

开始你的任务吧，祝你成功！

代码实现

package educoder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * UserLoseDriver
 */
public class ItemClickTopOneEachTypeDriver {

    public static class ThisMap extends Mapper<Object, Text, Text, Text> {
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            /*** 在这编写map内容 ****/
            /********** Begin **********/
            // 作用跟前几关一样，不再描述
            String[] atts = value.toString().split(",");
            String item = atts[1];
            String type = atts[2];
            String behavior = atts[3];
            if (behavior.equals("pv")) {
                context.write(new Text(type), new Text(item));
            }
            /********** End **********/
        }
}
    public static class ThisReduce extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
            /*** 在这编写reduce内容 ****/
            /********** Begin **********/
            // 提示: 先得出所有商品id的数量，再从这些数量中找出最大值
            // 1. 一个map，用来保存各个商品id的数量
            Map<String, Integer> map = new HashMap<>();
            // 2. 统计values中各个value的数量
            for (Text value : values) {
                String item = value.toString();
                Integer count = !map.containsKey(item) ? 1 : map.get(item) + 1;
                map.put(item, count);
            }
            // 3. 找出map中value最大的键值对
            Map.Entry<String, Integer> itemMax = Collections.max(map.entrySet(), (entry1, entry2) -> {
                return entry1.getValue() - entry2.getValue();
            });
            // 4. 结果写入reduce输出
            context.write(key, new Text(itemMax.getKey()));
            /********** End **********/
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "各个商品类别中点击量最高的商品");

        job.setJarByClass(ItemClickTopOneEachTypeDriver.class);
        job.setMapperClass(ThisMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(ThisReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

第4关：统计五种商品类别占比

任务描述

本关任务：根据用户行为数据，编写 MapReduce 程序来统计出五种商品分类占比数据。

编程要求

根据提示，在右侧编辑器补充代码，计算得出五种商品分类占比数据。

main 方法已给出，其中 Job 和输入输出路径已配置完成，无需更改；
map 和 reduce 的输入输出 key、value 已给出；
编程中直接写 map 与 reduce 过程的主要内容即可。

预期输出格式:

商品类别，占总数比例
商品类别，占总数比例
···

测试说明

平台会对你编写的代码进行测试，如果编写的 MapReduce 输出与预期一致，则通过。

开始你的任务吧，祝你成功！

代码实现

package educoder;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * UserLoseDriver
 */
public class ItemTypeRatioDriver {

    public static class ThisMap extends Mapper<Object, Text, Text, IntWritable> {
        private static IntWritable one = new IntWritable(1);
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            /*** 在这编写map内容 ****/
            /********** Begin **********/
            String[] atts = value.toString().split(",");
            String type = atts[2];
            context.write(new Text(type), one);
            /********** End **********/
        }
    }
    public static class ThisReduce extends Reducer<Text, IntWritable, Text, DoubleWritable> {
        // 保存reduce方法的处理结果
        Map<String,Integer> map = new HashMap<>();
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            /*** 在这编写reduce内容 ****/
            /********** Begin **********/
            int count = 0;
            for (IntWritable one : values) {
                count += one.get();
            }
            map.put(key.toString(), count);
            /********** End **********/
        }
        // 需要重写 cleanup方法
        @Override
        protected void cleanup(Reducer<Text, IntWritable, Text, DoubleWritable>.Context context)
                throws IOException, InterruptedException {
            // 得到所有商品类别数量的总和
            int sum = 0;
            for (int v : map.values()) {
                sum += v;
            }
            // 得出每个商品类别的占比
            for (String key : map.keySet()) {
                int value = map.get(key);
                double ratio = ((double) value) / sum;
                context.write(new Text(key), new DoubleWritable(ratio));
            }
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "五种商品分类占比");

        job.setJarByClass(ItemTypeRatioDriver.class);
        job.setMapperClass(ThisMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(ThisReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

第5关：统计各类商品种类的购买次数

任务描述

本关任务：根据用户行为数据，编写 MapReduce 程序来统计出各类商品种类的购买次数。

编程要求

根据提示，在右侧编辑器补充代码，计算得出各类商品种类的购买次数。

main 方法已给出，其中 Job 和输入输出路径已配置完成，无需更改；
map 和 reduce 的输入输出 key、value 已给出；
编程中直接写 map 与 reduce 过程的主要内容即可。

预期输出格式:

商品类型,购买次数
商品类型,购买次数
···

测试说明

平台会对你编写的代码进行测试，如果编写的 MapReduce 输出与预期一致，则通过。

开始你的任务吧，祝你成功！

代码实现

package educoder;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * UserLoseDriver
 */
public class ItemTypeBuyCountDriver {

    public static class ThisMap extends Mapper<Object, Text, Text, IntWritable> {
        private static IntWritable one = new IntWritable(1);
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            /*** 在这编写map内容 ****/
            /********** Begin **********/
            String[] atts = value.toString().split(",");
            String type = atts[2];
            if (atts[3].equals("buy")) {
                context.write(new Text(type), one);
            }
            /********** End **********/
        }
    }
    public static class ThisReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            /*** 在这编写reduce内容 ****/
            /********** Begin **********/
            int count = 0;
            for (IntWritable one : values) {
                count += one.get();
            }
            context.write(key, new IntWritable(count));
            /********** End **********/
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "各类商品总数的购买次数");

        job.setJarByClass(ItemTypeBuyCountDriver.class);
        job.setMapperClass(ThisMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(ThisReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

第6关：统计五类商品中各自点击量最高的商品的购买次数

任务描述

本关任务：根据用户行为数据，编写 MapReduce 程序来统计出五类商品中各自点击量最高的商品的购买次数。

编程要求

根据提示，在右侧编辑器补充代码，计算得出五类商品中各自点击量最高的商品的购买次数。

main 方法已给出，其中 Job 和输入输出路径已配置完成，无需更改；
map 和 reduce 的输入输出 key、value 已给出；
编程中直接写 map 与 reduce 过程的主要内容即可。

预期输出格式:

商品类型,本类型中点击量最高的id,购买次数
商品类型,本类型中点击量最高的id,购买次数
···

测试说明

平台会对你编写的代码进行测试，如果编写的 MapReduce 输出与预期一致，则通过。

开始你的任务吧，祝你成功！

代码实现

package educoder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * UserLoseDriver
 */
public class ItemMaxClickBuyCountDriver {

   public static class ThisMap extends Mapper<Object, Text, Text, Text> {
        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            /*** 在这编写map内容 ****/
            /********** Begin **********/
            String[] atts = value.toString().split(",");
            String type = atts[2];
            //把value作为map的输出值，因为到时还需要用到一些属性
            context.write(new Text(type), value);
            /********** End **********/
        }
    }
    public static class ThisReduce extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            /*** 在这编写reduce内容 ****/
            /********** Begin **********/
            Map<String, Integer> map = new HashMap<>();
            List<String> value_list = new ArrayList<>();
            // 1. 因为需要遍历多次values里的值，把values可迭代对象转化为list
            for (Text v : values) {
                value_list.add(v.toString());
            }
            // 2. 统计所有商品的数量
            for (String v : value_list) {
                String[] atts = v.toString().split(",");
                String item = atts[1];
                Integer count = !map.containsKey(item) ? 1 : map.get(item) + 1;
                map.put(item, count);
            }
            // 3. 找出点击数量最大的商品
            String itemClickMax = Collections.max(map.entrySet(), (entry1, entry2) -> {
                return entry1.getValue() - entry2.getValue();
            }).getKey();
            // 4. 统计点击量最大的商品的购买次数
            int buyCount = 0;
            for (String v : value_list) {
                String[] atts = v.toString().split(",");
                if (atts[1].equals(itemClickMax) && atts[3].equals("buy")) {
                    buyCount++;
                }
            }
            // 5. 把商品类别、点击量最大的商品id、购买次数写入reducer输出
            context.write(key, new Text(itemClickMax + "t" + buyCount));
            /********** End **********/
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "五个商品类别中点击量最高的商品的购买次数");

        job.setJarByClass(ItemMaxClickBuyCountDriver.class);
        job.setMapperClass(ThisMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(ThisReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

本图文内容来源于网友网络收集整理提供，作为学习参考使用，版权属于原作者。

THE END

mapreduce 大数据数据分析数据挖掘

二维码

Hadoop之HDFS

< <上一篇

架构师十项能力，对标阿里P8晋升要求，学习内容梳理

下一篇>>

实验八 项目案例-电商数据分析

电商大数据离线计算

第1关：统计用户流失情况

任务描述

相关知识

数据文件格式说明

用户流失情况

编程要求

测试说明

代码实现

第2关：统计所有商品点击量排行

任务描述

相关知识

数据文件格式说明

商品点击量排行

cleanup()方法

编程要求

测试说明

代码实现

第3关：统计各个商品类别中点击量最高的商品

任务描述

相关知识

数据文件格式说明

编程要求

代码实现

第4关：统计五种商品类别占比

任务描述

相关知识

数据文件格式说明

商品类别占比

cleanup()方法

编程要求

测试说明

代码实现

第5关：统计各类商品种类的购买次数

任务描述

相关知识

数据文件格式说明

编程要求

测试说明

代码实现

第6关：统计五类商品中各自点击量最高的商品的购买次数

任务描述

相关知识

数据文件格式说明

编程要求

测试说明

代码实现

最新文章

分类

标签云

实验八项目案例-电商数据分析