Commit 8e514388 authored by Sigmund Augdal's avatar Sigmund Augdal

Implemented the find top ports sample in scala

parent 3e8434ea
package no.uninett
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext._
/**
* Hello world!
......@@ -8,12 +9,16 @@ import org.apache.spark.SparkConf
*/
object App
{
val DestPort = 6
def main(args: Array[String]):Unit={
val appName = "scala test"
val conf = new SparkConf().setAppName(appName).setMaster("local")
val conf = new SparkConf().setAppName(appName)
val sc = new org.apache.spark.SparkContext(conf)
val data = Array(1, 2, 3, 4, 5)
val distData = sc.parallelize(data)
distData.map(x => x + 1).reduce((x, y) => x + y)
val lines = sc.textFile("hdfs://daas/daas_flows/trd-gw-2014-05-03.csv")
val csv = lines.map(x => x.split(","))
val ports = csv.map(x => x(DestPort))
val port_count = ports.map(x => (x, 1)).reduceByKey((x, y) => x + y)
val tops = port_count.map(x => (x._2, x._1)).sortByKey(false).take(10)
tops.foreach(println)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment