我想保存(作为镶木地板文件)包含自定义类作为列的Spark DataFrame.该类由另一个自定义类的Seq组成.为此,我以与VectorUDT类似的方式为每个类创建一个UserDefinedType类.我可以按照我的意图使用数据框,但不能将它作为镶木地板(或jason)保存到磁盘我将其报告为错误,但可能我的代码存在问题.我已经实现了一个更简单的示例来显示问题:
import org.apache.spark.sql.SaveMode
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types._
@SQLUserDefinedType(udt = classOf[AUDT])
case class A(list:Seq[B])
class AUDT extends UserDefinedType[A] {
override def sqlType: DataType = StructType(Seq(StructField("list", ArrayType(BUDT, containsNull = false), nullable = true)))
override def userClass: Class[A] = classOf[A]
override def serialize(obj: Any): Any = obj match {
case A(list) =>
val row = new GenericMutableRow(1)
row.update(0, new GenericArrayData(list.map(_.asInstanceOf[Any]).toArray))
row
}
override def deserialize(datum: Any): A = {
datum match {
case row: InternalRow => new …
Run Code Online (Sandbox Code Playgroud)