feature_columns的项必须是_FeatureColumn

Tul*_*mak 3 python machine-learning feature-selection deep-learning tensorflow

我收到此错误:

ValueError:feature_columns的项必须是_FeatureColumn.给定(类型):索引(['CreditScore','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','EstimatedSalary','Exited'],dtype ='object' ).

我正在使用tensorFlow lib.我想获得预测结果,但我无法运行m.train(input_fn=get_input_fn ,steps=5000)代码.无论我做什么,我总是得到同样的错误.我在下面使用了这些输入函数,但没有改变.

def input_fn_train():
     x=tf.constant(df_train.astype(np.float64)),
     y=tf.constant(df_train[LABEL].astype(np.float64))
     return x, y
Run Code Online (Sandbox Code Playgroud)

def get_input_fn(data_set, num_epochs=None, shuffle=False):
     return tf.estimator.inputs.pandas_input_fn(
      x=pd.DataFrame({k: data_set[k].values for k in data_set.columns}),
      y=pd.Series(data_set[LABEL].values), num_epochs=num_epochs,
                  shuffle=shuffle)
Run Code Online (Sandbox Code Playgroud)

我无法理解我该怎么做.错误是什么?我一直在谷歌搜索,但从未找到有用的东西.我该如何处理这个错误.代码如下.谢谢!

import pandas as pd
import tensorflow as tf
import numpy as np
import tempfile

COLS= ["RowNumber","CustomerId","Surname","CreditScore","Geography",
"Gender","Age","Tenure","Balance","NumOfProducts","HasCrCard",
"IsActiveMember","EstimatedSalary","Exited"]


FEATURES = ["CreditScore","Age","Tenure","Balance","NumOfProducts",
       "HasCrCard","IsActiveMember", "EstimatedSalary"]

LABEL="Exited"

df_train = pd.read_csv("Churn_Modelling.csv", skipinitialspace=True, 
header=0)
df_test = pd.read_csv("Churn_Modelling.csv", skipinitialspace=True, 
header=0)
test_label = df_test[LABEL].astype(float)
df_test.drop("Surname", axis = 1, inplace=True)
df_test.drop("RowNumber", axis = 1, inplace=True)
df_test.drop("CustomerId", axis = 1, inplace=True)
df_train.drop("CustomerId", axis = 1, inplace=True)
df_train.drop("Surname", axis = 1, inplace=True)
df_train.drop("RowNumber", axis = 1, inplace=True)
df_train.drop("Geography", axis = 1, inplace=True)
df_train.drop("Gender", axis = 1, inplace=True)

def get_input_fn():
    return {'x': tf.constant(df_train[FEATURES].as_matrix(), tf.float32, 
           df_train.shape),
           'y': tf.constant(df_train[LABEL].as_matrix(), tf.float32, 
            df_train.shape)
           }

 df=df_train.select_dtypes(exclude=['object'])
 numeric_cols=df.columns

 m = tf.estimator.LinearClassifier(model_dir=model_dir, feature_columns=
[numeric_cols])

 m.train(input_fn=get_input_fn ,steps=5000)
 results = m.evaluate(input_fn= get_input_fn(df_test, num_epochs=1, 
 shuffle=False),steps=None)

 y = m.predict(input_fn=get_input_fn(df_test, num_epochs=1, shuffle=False))
 pred = list(y)

 rowNumber=0
 for i in pred:
     print(str(rowNumber)+': '+str(pred[i]))
     rowNumber=rowNumber+1
Run Code Online (Sandbox Code Playgroud)

Max*_*xim 5

你的第一个错误就是如何创造tf.estimator.LinearClassifier.你传递数据帧索引df.columnsfeature_columns,但应该通过列表tensorflow专题栏目.列应该定义它是数字还是分类,在后一种情况下是编码类型.

其次,由于您正在读取pandas数据帧,因此输入函数可以简化很多.只是用tf.estimator.inputs.pandas_input_fn.

.csv很可能与众不同,我制作了一个带有一些价值的假人.所以这是一种读取输入并正确拟合模型的方法:

import pandas as pd
import tensorflow as tf

FEATURES = ["CreditScore", "Age", "Tenure", "Balance", "NumOfProducts", 
            "HasCrCard", "IsActiveMember", "EstimatedSalary", "Exited"]

credit_score = tf.feature_column.numeric_column("CreditScore")
age = tf.feature_column.numeric_column("Age")
tenure = tf.feature_column.numeric_column("Tenure")
balance = tf.feature_column.numeric_column("Balance")
num_of_products = tf.feature_column.numeric_column("NumOfProducts")
has_card = tf.feature_column.categorical_column_with_vocabulary_list("HasCrCard", ["True", "False"])
is_active_member = tf.feature_column.categorical_column_with_vocabulary_list("IsActiveMember", ["True", "False"])
estimated_salary = tf.feature_column.numeric_column("EstimatedSalary")
feature_columns = [credit_score, age, tenure, balance, num_of_products, has_card, is_active_member, estimated_salary]

def input_fn(num_epochs=None, shuffle=True, batch_size=100):
  df = pd.read_csv('Churn_Modelling.csv',
                   names=FEATURES,
                   dtype={'HasCrCard': str, 'IsActiveMember': str},
                   skipinitialspace=True,
                   header=0)
  df = df.dropna(how='any', axis=0)   # remove NaN elements
  labels = df["Exited"]
  return tf.estimator.inputs.pandas_input_fn(x=df,
                                             y=labels,
                                             batch_size=batch_size,
                                             num_epochs=num_epochs,
                                             shuffle=shuffle,
                                             num_threads=5)

model = tf.estimator.LinearClassifier(model_dir=None,
                                      feature_columns=feature_columns)
model.train(input_fn=input_fn(), steps=100)
Run Code Online (Sandbox Code Playgroud)


Tul*_*mak 1

它正在清晰地工作。

import pandas as pd
import tensorflow as tf
import tempfile
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score


def split_data(data, rate, label):
    data = data.dropna()

    train_data, test_data = train_test_split(data, test_size=rate)

    train_label = train_data[label]
    train_data = train_data.drop(label, 1)

    test_label = test_data[label]
    test_data = test_data.drop(label, 1)
    return train_data, train_label, test_data, test_label



LABEL = "Exited"

data = pd.read_csv("Churn_Modelling.csv", skipinitialspace=True, 
    header=0)

data.drop("Surname", axis=1, inplace=True)
data.drop("RowNumber", axis=1, inplace=True)
data.drop("CustomerId", axis=1, inplace=True)
data.drop("Geography", axis=1, inplace=True)
data.drop("Gender", axis=1, inplace=True)
x_train, y_train, x_test, y_test = split_data(data, 0.20, LABEL)



def get_input_fn_train():
    input_fn = tf.estimator.inputs.pandas_input_fn(
        x=x_train,
        y=y_train,
        shuffle=False
    )
    return input_fn

def get_input_fn_test():
    input_fn = tf.estimator.inputs.pandas_input_fn(
        x=x_test,
        y=y_test,
        shuffle=False
    )
    return input_fn


feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn
(get_input_fn_train())


model_dir = tempfile.mkdtemp()
m = tf.estimator.LinearClassifier(model_dir=model_dir, 
feature_columns=feature_columns)

# train data
m.train(input_fn=get_input_fn_train(), steps=5000)

# you can get accuracy, accuracy_baseline, auc, auc_precision_recall, 
#average_loss, global_step, label/mean, lossprediction/mean

results = m.evaluate(input_fn=get_input_fn_test(), steps=None)

print("model directory = %s" % model_dir)
for key in sorted(results):
    print("%s: %s" % (key, results[key]))

# get prediction results
y = m.predict(input_fn=get_input_fn_test())
predictions = list(y)
pred1=pd.DataFrame(data=predictions)
prediction=pd.DataFrame(data=pred1['class_ids'])
pred=[]
for row in prediction["class_ids"]:
    pred.append(row[0])

rowNumber = 0
for i in pred:
    print(str(rowNumber) + ': ' + str(i))
    rowNumber = rowNumber + 1


def calculate(prediction, LABEL):
    arr = {"accuracy": accuracy_score(prediction, LABEL),
           "report": classification_report(prediction, LABEL),
           "Confusion_Matrix": confusion_matrix(prediction, LABEL),
           "F1 score": f1_score(prediction, LABEL),
           "Recall Score": recall_score(prediction, LABEL),
           "cohen_kappa": cohen_kappa_score(prediction, LABEL)
           }
    return arr


pred2 = pd.DataFrame(data=pred)

print(calculate(pred2.round(), y_test))
Run Code Online (Sandbox Code Playgroud)