我正在尝试使用以下方法建立集群机制
如下:
在bigquery中创建基于用户级别的功能表示
例:功能表的外观
userid |x1 |x2 |x3 |x4 |x5 |x6 |x7 |x8 |x9 |x10
00013 |0.01 | 0 |0 |0 |0 |0 |0 |0.06 |0.09 | 0.001
#!/usr/bin/python
"""BigQuery I/O PySpark example."""
import json
import pprint
import subprocess
import pyspark
import numpy as np
from pyspark.ml.clustering import KMeans
from pyspark import SparkContext
from pyspark.ml import Pipeline
from pyspark.sql import SQLContext
from pyspark.mllib.linalg import Vectors, _convert_to_vector …Run Code Online (Sandbox Code Playgroud) python apache-spark pyspark apache-spark-ml apache-spark-mllib