Spark统计一座城市的男女人数，以及男女消费额的最高与最低

`````` 		Random r = new Random();
FileWriter fw = new FileWriter("生成文件的路径以及名称");
int x = r.nextInt(1100000)-100000;
for(int y = 1;y <= x;y++) {
int sex1 = r.nextInt(10);
int cost = r.nextInt(10000);
String sex;
if (sex1%2 == 1)     //性别使用随机生成的数是奇数或是偶数来判断
sex = "M";
else
sex = "F";

fw.write(y + " "  + sex + " " + cost + "rn" );
fw.flush();

}
fw.close();``````

``````		SparkConf sparkconf = new SparkConf().
setAppName(" ").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(sparkconf);
JavaRDD<String> lines = sc.textFile("数据路径");
JavaRDD<Integer> rdd1 = lines.map(f->{
return (Integer.valueOf(f.split(" ")[2]));
}); //只取出消费额这一列，并将其转化成整数类型
long counts = rdd1.count(); //统计城市有多少人
long avg = 0;
int sum = 0;

sum = rdd1.reduce((a, b) -> a + b); //求消费额总额
avg = sum/counts; // 人均消费=总消费额/人数
//输出
System.out.println("这座城市一共有：" + counts + "人");
System.out.println("这座城市的消费总额为：" + sum + "元");
System.out.println("这座城市的人均消费额为：" + avg + "元");``````

``````		SparkConf sparkconf = new SparkConf().
setAppName(" ").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(sparkconf);
JavaRDD<String> lines = sc.textFile("数据路径");
JavaPairRDD<String, Integer> rdd1 =
lines.mapToPair(f->new Tuple2<>(f.split(" ")[1],Integer.valueOf(f.split(" ")[2])));
JavaPairRDD<String, Integer>rddM1 = rdd1.filter(f->{
return (f._1().contains("M"));
}); //只保留含有M的数据
JavaPairRDD<String, Integer>rddF1 = rdd1.filter(f->{
return (f._1().contains("F"));
}); //只保留含有F的数据

JavaPairRDD<String, Integer> MAXM = rddM1.reduceByKey((x,y)->MAX(x,y));		//消费额比较大小保留大的，下面同理
JavaPairRDD<String, Integer> MAXF = rddF1.reduceByKey((x,y)->MAX(x,y));
JavaPairRDD<String, Integer> MINM = rddM1.reduceByKey((x,y)->MIN(x,y));
JavaPairRDD<String, Integer> MINF = rddF1.reduceByKey((x,y)->MIN(x,y));
MINF.foreach(x -> System.out.println("女性最低消费"+x._2));
MINM.foreach(x -> System.out.println("男性最低消费"+x._2));
MAXF.foreach(x -> System.out.println("女性最高消费"+x._2));
MAXM.foreach(x -> System.out.println("男性最高消费"+x._2));
long countsM = rddM1.count(); //统计人数
long countsF = rddF1.count();
System.out.println("男性人数为："+countsM + "n" + "女性人数为：" + countsF);``````

``````	public static int MAX(int x,int y) {
if (x<y) {
return y;
}else {
return x;
}
}
public static int MIN(int x,int y) {
if (x<y) {
return x;
}else {
return y;
}
}	``````

THE END