Commit 6524f2c8 authored by Gurvinder Singh's avatar Gurvinder Singh

intial comming to sample apps

parent d68eef1f
<project>
<groupId>edu.berkeley</groupId>
<artifactId>simple-project</artifactId>
<modelVersion>4.0.0</modelVersion>
<name>Simple Project</name>
<packaging>jar</packaging>
<version>1.0</version>
<repositories>
<repository>
<id>Akka repository</id>
<url>http://repo.akka.io/releases</url>
</repository>
</repositories>
<dependencies>
<dependency> <!-- Spark dependency -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.3.0</version>
</dependency>
</dependencies>
</project>
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
public static void main(String[] args) {
String logFile = "spark-env.sh"; // Should be some file on your system
JavaSparkContext sc = new JavaSparkContext("local", "Simple App",
new String[]{"target/simple-project-1.0.jar"});
JavaRDD<String> logData = sc.textFile(logFile).cache();
long numAs = logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("a"); }
}).count();
long numBs = logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("b"); }
}).count();
System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
}
}
from pyspark import SparkContext
sc = SparkContext(appName='test')
data = [1, 2, 3, 4, 5]
distData = sc.parallelize(data)
print(distData.reduce(lambda a, b: a + b))
from pyspark import SparkContext
from pyspark.sql import SQLContext
sc = SparkContext(appName='testSQL')
sqlCtx = SQLContext(sc)
lines = sc.textFile("hdfs://daas/user/hdfs/trd_gw1_12_01_normalized.csv")
parts = lines.map(lambda l: l.split(","))
records = parts.map(lambda p: {"date": p[0], "src_ip": p[1], "dest_ip": p[2], "port": int(p[3])})
recordsTable = sqlCtx.inferSchema(records)
recordsTable.registerAsTable("records")
http = sqlCtx.sql("SELECT count(*) FROM records WHERE port <= 80)")
print(http)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment