Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Gurvinder Singh
spark_apps
Commits
6524f2c8
Commit
6524f2c8
authored
Jun 27, 2014
by
Gurvinder Singh
Browse files
intial comming to sample apps
parent
d68eef1f
Changes
4
Hide whitespace changes
Inline
Side-by-side
javaApp/pom.xml
0 → 100644
View file @
6524f2c8
<project>
<groupId>
edu.berkeley
</groupId>
<artifactId>
simple-project
</artifactId>
<modelVersion>
4.0.0
</modelVersion>
<name>
Simple Project
</name>
<packaging>
jar
</packaging>
<version>
1.0
</version>
<repositories>
<repository>
<id>
Akka repository
</id>
<url>
http://repo.akka.io/releases
</url>
</repository>
</repositories>
<dependencies>
<dependency>
<!-- Spark dependency -->
<groupId>
org.apache.spark
</groupId>
<artifactId>
spark-core_2.10
</artifactId>
<version>
1.0.0
</version>
</dependency>
<dependency>
<groupId>
org.apache.hadoop
</groupId>
<artifactId>
hadoop-client
</artifactId>
<version>
2.3.0
</version>
</dependency>
</dependencies>
</project>
javaApp/src/main/java/SimpleApp.java
0 → 100644
View file @
6524f2c8
import
org.apache.spark.api.java.*
;
import
org.apache.spark.api.java.function.Function
;
public
class
SimpleApp
{
public
static
void
main
(
String
[]
args
)
{
String
logFile
=
"spark-env.sh"
;
// Should be some file on your system
JavaSparkContext
sc
=
new
JavaSparkContext
(
"local"
,
"Simple App"
,
new
String
[]{
"target/simple-project-1.0.jar"
});
JavaRDD
<
String
>
logData
=
sc
.
textFile
(
logFile
).
cache
();
long
numAs
=
logData
.
filter
(
new
Function
<
String
,
Boolean
>()
{
public
Boolean
call
(
String
s
)
{
return
s
.
contains
(
"a"
);
}
}).
count
();
long
numBs
=
logData
.
filter
(
new
Function
<
String
,
Boolean
>()
{
public
Boolean
call
(
String
s
)
{
return
s
.
contains
(
"b"
);
}
}).
count
();
System
.
out
.
println
(
"Lines with a: "
+
numAs
+
", lines with b: "
+
numBs
);
}
}
pythonApp/sampleApp.py
0 → 100644
View file @
6524f2c8
from
pyspark
import
SparkContext
sc
=
SparkContext
(
appName
=
'test'
)
data
=
[
1
,
2
,
3
,
4
,
5
]
distData
=
sc
.
parallelize
(
data
)
print
(
distData
.
reduce
(
lambda
a
,
b
:
a
+
b
))
pythonApp/sqlApp.py
0 → 100644
View file @
6524f2c8
from
pyspark
import
SparkContext
from
pyspark.sql
import
SQLContext
sc
=
SparkContext
(
appName
=
'testSQL'
)
sqlCtx
=
SQLContext
(
sc
)
lines
=
sc
.
textFile
(
"hdfs://daas/user/hdfs/trd_gw1_12_01_normalized.csv"
)
parts
=
lines
.
map
(
lambda
l
:
l
.
split
(
","
))
records
=
parts
.
map
(
lambda
p
:
{
"date"
:
p
[
0
],
"src_ip"
:
p
[
1
],
"dest_ip"
:
p
[
2
],
"port"
:
int
(
p
[
3
])})
recordsTable
=
sqlCtx
.
inferSchema
(
records
)
recordsTable
.
registerAsTable
(
"records"
)
http
=
sqlCtx
.
sql
(
"SELECT count(*) FROM records WHERE port <= 80)"
)
print
(
http
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment