Spark _13_二次排序问题
生活随笔
收集整理的這篇文章主要介紹了
Spark _13_二次排序问题
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
使用javaAPI(使用到對象排序)
數據:
3 1 5 2 6 5 8 123 1 4 4 123 5 432 3 54 5 121 8 654 3 98代碼:
package ddd;import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2;/*** @author George* @description**/ public class SecondSort {public static void main(String[] args) {SparkConf conf = new SparkConf().setAppName("ss").setMaster("local");JavaSparkContext sc = new JavaSparkContext(conf);JavaRDD<String> tf = sc.textFile("./data/secondSort.txt");JavaPairRDD<SecondSortKey, String> map = tf.mapToPair(new PairFunction<String, SecondSortKey, String>() {@Overridepublic Tuple2<SecondSortKey, String> call(String s) throws Exception {String[] strings = s.split(" ");int first = Integer.parseInt(strings[0]);int second = Integer.parseInt(strings[1]);SecondSortKey secondSortKey = new SecondSortKey(first, second);return new Tuple2<>(secondSortKey, s);}});map.sortByKey(false).foreach(new VoidFunction<Tuple2<SecondSortKey, String>>() {@Overridepublic void call(Tuple2<SecondSortKey, String> secondSortKeyStringTuple2) throws Exception {System.out.println(secondSortKeyStringTuple2._2);}});} }************************************************************************ package ddd;import java.io.Serializable;/*** @author George* @description**/ public class SecondSortKey implements Serializable,Comparable<SecondSortKey> {private int first;private int second;public SecondSortKey() {}public SecondSortKey(int first, int second) {this.first = first;this.second = second;}public int getFirst() {return first;}public void setFirst(int first) {this.first = first;}public int getSecond() {return second;}public void setSecond(int second) {this.second = second;}@Overridepublic int compareTo(SecondSortKey o) {if (this.getFirst() - o.getFirst() == 0){return this.getSecond() - o.getSecond();}else{return this.getFirst()-o.getFirst();}} }結果展示:
scalaAPI:
?
package suanziimport org.apache.spark.{SparkConf, SparkContext} // object SecondSort {def main(args: Array[String]): Unit = {val sparkConf = new SparkConf().setMaster("local").setAppName("SecondSort")val sc = new SparkContext(sparkConf)sc.setLogLevel("error")val rdd = sc.textFile("./data/secondSort.txt")val mapRdd = rdd.map(x => {(new SecondSortKey(x.split(" ")(0).toInt, x.split(" ")(1).toInt), null)}) // mapRdd.foreach(println)val sortRdd = mapRdd.sortByKey(false)sortRdd.map(_._1).foreach(println)/*** 8 654* 8 123* 6 5* 5 432* 5 121* 5 2* 4 123* 3 98* 3 54* 3 1* 1 4*/sc.stop()} } class SecondSortKey(val first:Int,val second:Int) extends Ordered[SecondSortKey] with Serializable{override def compare(that: SecondSortKey): Int = {if (this.first - that.first == 0){this.second - that.second}else{this.first - that.first}}override def toString: String = {this.first + " " +this.second} }?
總結
以上是生活随笔為你收集整理的Spark _13_二次排序问题的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Hive _函数(系统内置函数、自定义函
- 下一篇: flatmap和map的区别!