使用java编写spark程序,代码如下:
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("wordCount").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> input = sc.textFile("/bigdata/softwares/spark-2.1.0-bin-hadoop2.7/testdata/a.txt");
System.out.println();
Long bCount = input.filter(new Function<String,Boolean>(){
public Boolean call(String s){return s.contains("yes");}
}).count();
Long cCount = input.filter(new Function<String,Boolean>(){
public Boolean call(String s){return s.contains("ywq");}
}).count();
System.out.println("yes:"+bCount+" ywq:"+cCount+" all?");
// sc.stop();
}
}
Run Code Online (Sandbox Code Playgroud)
Pom如下?
<dependencies>
<dependency> <!-- Spark dependency -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId> …Run Code Online (Sandbox Code Playgroud)