Commit c07c3512 authored by Gurvinder Singh's avatar Gurvinder Singh

fixed sql app

parent cef69991
......@@ -5,7 +5,7 @@ import argparse
DESCRIPTION = "Analyze netflow data"
conf = SparkConf()
conf.setAppName("Netflow test").set("spark.executor.memory", "1g").set("spark.default.parallelism", 15).set("spark.mesos.coarse", "true")
conf.setAppName("Netflow test").set("spark.executor.memory", "1g").set("spark.default.parallelism", 15).set("spark.mesos.coarse", "false")
sc = SparkContext(conf=conf)
......
......@@ -3,12 +3,13 @@ from pyspark.sql import SQLContext
sc = SparkContext(appName='testSQL')
sqlCtx = SQLContext(sc)
lines = sc.textFile("hdfs://daas/user/hdfs/trd_gw1_12_01_normalized.csv")
#lines = sc.textFile("hdfs://daas/user/hdfs/csv-old/trd_gw1_12_01_normalized.csv")
lines = sc.textFile("hdfs://daas/spark/test")
parts = lines.map(lambda l: l.split(","))
records = parts.map(lambda p: {"date": p[0], "src_ip": p[1], "dest_ip": p[2], "port": int(p[3])})
records = parts.map(lambda p: {"text": p[0], "val": int(p[1]), "val1": int(p[2])})
recordsTable = sqlCtx.inferSchema(records)
recordsTable.registerAsTable("records")
http = sqlCtx.sql("SELECT count(*) FROM records WHERE port <= 80)")
print(http)
http = sqlCtx.sql("SELECT text FROM records WHERE val1 >= 1 AND val1 <= 41")
text = http.map(lambda p: "Text: "+p.text)
print(text.collect())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment