小编Ban*_*nty的帖子

Spark SQL UNION - ORDER BY 列不在 SELECT 中

我正在对两个临时表进行 UNION 并尝试按列排序,但 Spark 抱怨我排序所依据的列无法解析。这是一个错误还是我遗漏了一些东西?

lazy val spark: SparkSession = SparkSession.builder.master("local[*]").getOrCreate()
      import org.apache.spark.sql.types.StringType

      val oldOrders = Seq(
        Seq("old_order_id1", "old_order_name1", "true"),
        Seq("old_order_id2", "old_order_name2", "true")
      )

      val newOrders = Seq(
        Seq("new_order_id1", "new_order_name1", "false"),
        Seq("new_order_id2", "new_order_name2", "false")
      )
      val schema = new StructType()
        .add("id", StringType)
        .add("name", StringType)
        .add("is_old", StringType)

      val oldOrdersDF = spark.createDataFrame(spark.sparkContext.makeRDD(oldOrders.map(x => Row(x:_*))), schema)
      val newOrdersDF = spark.createDataFrame(spark.sparkContext.makeRDD(newOrders.map(x => Row(x:_*))), schema)

      oldOrdersDF.createOrReplaceTempView("old_orders")
      newOrdersDF.createOrReplaceTempView("new_orders")

      //ordering by column not in select works if I'm not doing UNION
      spark.sql(
        """
          |SELECT oo.id, oo.name FROM …
Run Code Online (Sandbox Code Playgroud)

apache-spark apache-spark-sql

3
推荐指数
1
解决办法
2万
查看次数

标签 统计

apache-spark ×1

apache-spark-sql ×1