我尝试用Apache Spark执行简单的项目.这是我的代码SimpleApp.scala
/* SimpleApp.scala */
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object SimpleApp {
def main(args: Array[String]) {
val logFile = "/home/hduser/spark-1.2.0-bin-hadoop2.4/README.md" // Should be some file on your system
// val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext("local", "Simple Job", "/home/hduser/spark-1.2.0-bin-hadoop2.4/")
val logData = sc.textFile(logFile, 2).cache()
val numAs = logData.filter(line => line.contains("hadoop")).count()
val numBs = logData.filter(line => line.contains("see")).count()
println("Lines with hadoop: %s, Lines with see: %s".format(numAs, numBs))
}
}
Run Code Online (Sandbox Code Playgroud)
当我使用命令行手动将此作业发送到Spark时:/home/hduser/spark-1.2.0-hadoop-2.4.0/bin/spark-submit --class "SimpleApp" --master local[4] target/scala-2.10/simple-project_2.10-1.0.jar …