Commit 7566712b authored by Gurvinder Singh's avatar Gurvinder Singh

added missing java code

parent d91267bf
<name>Simple Project</name>
<id>Akka repository</id>
<dependency> <!-- Spark dependency -->
import scala.Tuple2;
import java.util.List;
class Split implements Function<String, String[]> {
public String[] call(String s) {
return s.split(",");
class MKTuple implements PairFunction<String, String, Integer> {
public Tuple2<String, Integer> call(String x) {
return new Tuple2<String, Integer>(x, 1);
class Add implements Function2<Integer, Integer, Integer> {
public Integer call(Integer a, Integer b) {
return a + b;
class Swap implements PairFunction<Tuple2<String, Integer>, Integer, String> {
public Tuple2<Integer, String> call(Tuple2<String, Integer> in) {
return new Tuple2<Integer, String>(in._2, in._1);
class GetPort implements Function<String[], String> {
public String call(String[] a) {
return a[6];
public class SimpleApp {
public static void main(String[] args) {
String logFile = "hdfs://daas/daas_flows/trd-gw-2014-05-03.csv";
JavaSparkContext sc = new JavaSparkContext();
JavaRDD<String> logData = sc.textFile(logFile);
JavaRDD<String[]> csv = Split());
public static void topPorts(JavaRDD<String[]> csv) {
JavaRDD<String> ports = GetPort());
JavaPairRDD<String, Integer> pairs = ports.mapToPair(new MKTuple());
JavaPairRDD<String, Integer> port_count = pairs.reduceByKey(new Add());
JavaPairRDD<Integer, String> invpairs = port_count.mapToPair(new Swap());
JavaPairRDD<Integer, String> sorted = invpairs.sortByKey(false);
List<Tuple2<Integer, String>> tops = sorted.take(15);
for (int i = 0; i < 15; ++i) {
System.out.println(String.format(" %s %s", tops.get(i)._1, tops.get(i)._2));
#Generated by Maven
#Thu May 14 19:59:09 CEST 2015
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment