From 3e8434eafe058759624af699b0aaa4bc37d6f32f Mon Sep 17 00:00:00 2001 From: Sigmund Augdal Date: Mon, 7 Jul 2014 17:26:36 +0200 Subject: [PATCH] Replace two maps and a union with a flatMap. Seems to be faster --- pythonApp/netflowAlgs.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pythonApp/netflowAlgs.py b/pythonApp/netflowAlgs.py index 891f2d5..5a40467 100644 --- a/pythonApp/netflowAlgs.py +++ b/pythonApp/netflowAlgs.py @@ -31,7 +31,9 @@ def ports_count_by_ip3(csv): def ports_count_by_ip(csv): - srcs = csv.map(lambda x: ((x[DEST_PORT], x[SRC_IP]), 1)) - dsts = csv.map(lambda x: ((x[DEST_PORT], x[DEST_IP]), 1)) - ips = srcs.union(dsts).reduceByKey(add) +# srcs = csv.map(lambda x: ((x[DEST_PORT], x[SRC_IP]), 1)) +# dsts = csv.map(lambda x: ((x[DEST_PORT], x[DEST_IP]), 1)) +# ips = srcs.union(dsts).reduceByKey(add) + ips = csv.flatMap(lambda x: (((x[DEST_PORT], x[DEST_IP]), 1), + ((x[DEST_PORT], x[SRC_IP]), 1))).reduceByKey(add) return ips.map(lambda x: (x[1], x[0])).sortByKey(False).take(20) -- GitLab