我正在尝试在Spark Java中使用分析/窗口函数last_value。
select sno, name, addr1, addr2, run_dt,
last_value(addr1 ignore nulls) over (partition by sno, name, addr1, addr2, run_dt order by beg_ts , end_ts rows between unbounded preceding and unbounded following ) as last_addr1
from daily
Run Code Online (Sandbox Code Playgroud)
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.expressions.Window;
import org.apache.spark.sql.expressions.WindowSpec;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.execution.WindowFunctionFrame;
SparkConf conf = new SparkConf().setMaster("local").setAppName("Agg");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sc);
JavaRDD<Stgdailydtl> daily = sc.textFile("C:\\Testing.txt").map(
new Function<String, Stgdailydtl>() { …Run Code Online (Sandbox Code Playgroud)