我的代码:
from numpy import *
def pca(orig_data):
data = array(orig_data)
data = (data - data.mean(axis=0)) / data.std(axis=0)
u, s, v = linalg.svd(data)
print s #should be s**2 instead!
print v
def load_iris(path):
lines = []
with open(path) as input_file:
lines = input_file.readlines()
data = []
for line in lines:
cur_line = line.rstrip().split(',')
cur_line = cur_line[:-1]
cur_line = [float(elem) for elem in cur_line]
data.append(array(cur_line))
return array(data)
if __name__ == '__main__':
data = load_iris('iris.data')
pca(data)
Run Code Online (Sandbox Code Playgroud)
虹膜数据集:http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
输出:
[ 20.89551896 …Run Code Online (Sandbox Code Playgroud)