Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Gurvinder Singh
spark_apps
Commits
1c7937c3
Commit
1c7937c3
authored
Jul 04, 2014
by
Gurvinder Singh
Browse files
added support for reading lzo files
parent
cd8890ca
Changes
2
Hide whitespace changes
Inline
Side-by-side
pythonApp/netflowAlgs.py
View file @
1c7937c3
...
...
@@ -26,8 +26,8 @@ def top_ports(csv, num=10):
def
ports_count_by_ip3
(
csv
):
ips
=
csv
.
map
(
lambda
x
:
((
x
[
DEST_PORT
],
x
[
SRC_IP
],
x
[
DEST_IP
]),
1
))
ip_count
=
ips
.
reduceByKey
(
add
,
numPartitions
=
3
0
)
return
ip_count
.
map
(
lambda
x
:
(
x
[
1
],
x
[
0
])).
sortByKey
(
False
,
numPartitions
=
3
0
).
take
(
20
)
ip_count
=
ips
.
reduceByKey
(
add
,
numPartitions
=
12
0
)
return
ip_count
.
map
(
lambda
x
:
(
x
[
1
],
x
[
0
])).
sortByKey
(
False
,
numPartitions
=
12
0
).
take
(
20
)
def
ports_count_by_ip
(
csv
):
...
...
pythonApp/netflowTest.py
View file @
1c7937c3
from
pyspark.conf
import
SparkConf
from
pyspark
import
SparkContext
import
argparse
import
os
from
netflowAlgs
import
top_ips
,
top_ports
,
ports_count_by_ip
,
ports_count_by_ip3
DESCRIPTION
=
"Analyze netflow data"
conf
=
SparkConf
()
conf
.
setAppName
(
"Netflow test"
).
set
(
"spark.executor.memory"
,
"2g"
)
#conf.setAppName("Netflow test").set("spark.executor.memory", "12g").set("spark.mesos.coarse","true")
conf
.
setAppName
(
"Netflow test"
).
set
(
"spark.executor.memory"
,
"12g"
)
sc
=
SparkContext
(
conf
=
conf
)
...
...
@@ -22,8 +24,12 @@ def parse_args():
return
parser
.
parse_args
()
opts
=
parse_args
()
fname
,
fext
=
os
.
path
.
splitext
(
opts
.
input
)
csv
=
sc
.
textFile
(
opts
.
input
).
map
(
lambda
x
:
x
.
split
(
","
))
if
fext
==
".lzo"
:
csv
=
sc
.
newAPIHadoopFile
(
opts
.
input
,
"com.hadoop.mapreduce.LzoTextInputFormat"
,
"org.apache.hadoop.io.LongWritable"
,
"org.apache.hadoop.io.Text"
).
map
(
lambda
x
:
x
[
1
].
split
(
","
))
else
:
csv
=
sc
.
textFile
(
opts
.
input
).
map
(
lambda
x
:
x
.
split
(
","
))
if
opts
.
find_top_ports
:
print
"Finding top ports"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment