我正在尝试在 python 中运行以下代码:
from pyspark.sql.types import StringType
from pyspark.sql.functions import udf
from pyspark import SparkContext
from pyspark import SparkConf
import pyspark.sql.functions as pf
import logging
import sys
from pyspark.sql import SQLContext
log = logging.getLogger('EXT')
class Test:
def __init__(self):
pass
def ext_udf(self, f):
return udf(lambda f: self.test(1))
def test(self,arg):
return(arg)
def create_df(self):
log.info("Test")
log.debug("Test")
conf = SparkConf().setAppName('Extr')
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)
df = SQLContext.createDataFrame(sqlContext,[{'name': 'Alice', 'age': 1}])
df.withColumn('meta-data', self.ext_udf(1)(pf.col("name"))).show()
if __name__ == "__main__":
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
t=Test()
t.create_df()
Run Code Online (Sandbox Code Playgroud)
我知道,这根本没有意义,但它确实重现了我的错误。
Could …Run Code Online (Sandbox Code Playgroud)