Luk*_*uhn 5 python machine-learning python-3.x tensorflow
我试图从这里的数据集中的值“名称”、“平台”、“流派”、“出版商”和“年份”来预测全球销售额:https : //www.kaggle.com/gregorut/videogamesales
这是我训练模型的代码:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
import tensorflow as tf
dftrain = pd.read_csv('./vgsales_eval.csv')
dfeval = pd.read_csv('./vgsales_train.csv')
print(dftrain[dftrain.isnull().any(axis=1)])
y_train = dftrain.pop('Global_Sales')
y_eval = dfeval.pop('Global_Sales')
CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
vocabulary = dftrain[feature_name].unique() # gets a list of all unique values from given feature column
feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))
print(feature_columns)
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
def input_function():
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
if shuffle:
ds = ds.shuffle(1000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
return input_function
train_input_fn = make_input_fn(dftrain, y_train)
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
Run Code Online (Sandbox Code Playgroud)
我收到以下错误:
Traceback (most recent call last):
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 93, in normalize_element
spec = type_spec_from_value(t, use_fallback=False)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 466, in type_spec_from_value
(element, type(element).__name__))
TypeError: Could not build a TypeSpec for 0 Tecmo Koei
1 Nippon Ichi Software
2 Ubisoft
3 Activision
4 Atari
...
6594 Kemco
6595 Infogrames
6596 Activision
6597 7G//AMES
6598 Wanadoo
Name: Publisher, Length: 6599, dtype: object with type Series
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 45, in <module>
linear_est.train(train_input_fn)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1201, in _train_model_default
self._get_features_and_labels_from_input_fn(input_fn, ModeKeys.TRAIN))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1037, in _get_features_and_labels_from_input_fn
self._call_input_fn(input_fn, mode))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1130, in _call_input_fn
return input_fn(**kwargs)
File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 34, in input_function
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 682, in from_tensor_slices
return TensorSliceDataset(tensors)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 3001, in __init__
element = structure.normalize_element(element)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 98, in normalize_element
ops.convert_to_tensor(t, name="component_%d" % i))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1499, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 338, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
allow_broadcast=True)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 282, in _constant_impl
allow_broadcast=allow_broadcast))
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 563, in make_tensor_proto
append_fn(tensor_proto, proto_values)
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in SlowAppendObjectArrayToTensorProto
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in <listcomp>
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\compat.py", line 87, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got nan
Run Code Online (Sandbox Code Playgroud)
我在这里做错了什么?是数据集有问题还是我必须以不同的方式读取值?
这基本上与null您获取的数据中存在的值有关,您需要在加载数据时对其进行处理。
我做了一些改变。
df.fillna根据数据类型,根据需要填写的列和值来执行。Year数据类型从更改float为int。因为这会导致另一个问题tensor_slices。下面是修改后的代码,其数据与您所获取的数据相同。
df = pd.read_csv('/content/vgsales.csv')
# print(df.head())
print(df[df.isnull().any(axis=1)])
# df.fillna('', inplace=True)
df.dropna(how="any",inplace = True)
df.Year = df.Year.astype(int)
CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
vocabulary = df[feature_name].unique() # gets a list of all unique values from given feature column
feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))
print(feature_columns)
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
def input_function():
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
if shuffle:
ds = ds.shuffle(1000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
return input_function
train_input_fn = make_input_fn(df, y_train)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
4348 次 |
| 最近记录: |