Keras NN 损失没有减少

Question

Keras NN 损失没有减少

DSP*_*209 7 python-3.x conv-neural-network keras tensorflow-gpu

我在 Windows 10 上使用 Keras 2.07、Python 3.5、Tensorflow 1.3.0

我正在测试论文 Long-term Temporal Convolutions for Action Recognition 中使用的架构。我希望将它用于我自己的数据。我使用了我认为会很快收敛的测试数据。学习率下降到 1.e-6，但损失从未脱离 4.5549672 的高值......

有人可以查看或尝试此代码吗？我是否假设错误，编码错误或不耐烦？

谢谢

## Attempt to implement based on ...
#Long-term Temporal Convolutionsfor Action Recognition
#
#Gul Varol, Ivan Laptev, and Cordelia Schmid, ¨ Fellow, IEEE
#
import time
import numpy as np,cv2
import sys
import os

import keras
import tensorflow as tf

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten,  Input
from keras.layers import Conv3D, MaxPooling3D
from keras.layers import ZeroPadding3D, AveragePooling3D, MaxPooling3D,LeakyReLU
from keras.optimizers import SGD,rmsprop, adam, Adagrad, Nadam, Adadelta

from keras import regularizers


from keras import backend as K


K.clear_session()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=.99, allocator_type = 'BFC') 
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True, gpu_options=gpu_options))
config = tf.ConfigProto()


#path information
sample_root_path = "C:\\"

cur=os.chdir(sample_root_path )




batch_size = 15
num_classes = 1
epochs = 5000
my_lambda = 0.

nchan = 3

m_loss = 'binary_crossentropy'

m_opt = Nadam(lr=0.02, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
#
m_met = ['binary_accuracy'] 

#test data
nvideos = 300
nframes = 30
nrows = 60
ncols = 80
nchan = 3

x_flow = np.ones((nvideos, nframes,nrows,ncols,nchan),np.float32)
x_flow[150:] =-1.0
y_flow = np.ones((nvideos),np.float32)
y_flow[150:]= 0




t_inp =Input(shape=(nframes,nrows,ncols,nchan),name='t_inp')

t =  Conv3D(64, (3, 3,3), activation='relu',padding="same", name="conv1", strides=(1, 1, 1),
                 kernel_initializer = 'glorot_normal',
                 bias_initializer = 'glorot_normal', 
                 bias_regularizer = regularizers.l1(my_lambda), 
                 trainable=True) (t_inp)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
t = MaxPooling3D(pool_size=(2,2,1), strides=(2, 2, 2),name='pool1') (t)


t =  Conv3D(128, (3, 3,3), activation='relu',padding="same", name="conv2", strides=(1, 1, 1), 
                  bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool2') (t)




t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv3", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda),trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool3') (t)



t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv4", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)

t = MaxPooling3D(pool_size=(2,2,2), strides=(2, 2, 2),name='pool4') (t)



t =  Conv3D(256, (3, 3,3), activation='relu',padding="same", name="conv5", strides=(1, 1, 1), 
                 bias_regularizer = regularizers.l1(my_lambda), trainable=True) (t)
t = LeakyReLU(alpha=0.3,trainable=True) (t)
#
t = MaxPooling3D(pool_size=(1,1,1), strides=(1, 1, 1),name='pool5') (t)
##*****************************

t = Flatten () (t)
t = Dense((2048),name='s_den00') (t)
t = Dropout(.5) (t)
t = Dense((2048),name='s_den0') (t)
t = Dropout(.5) (t)


t = Dense((num_classes),activation='softmax',name='s_den1') (t)


model = Model(inputs=t_inp,outputs=t)
print (model.summary())

model.compile(loss=m_loss, optimizer=m_opt, metrics=m_met)

print ('compiled model')
tb = keras.callbacks.TensorBoard(log_dir=sample_root_path+'logs', histogram_freq=0,
                          write_graph=True, write_grads=True, write_images=True)

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,verbose=1, patience=3, min_lr=0.000001)

with tf.device('/gpu:0'):
  history = model.fit(x_flow, y_flow,
                      batch_size=batch_size,
                      callbacks=[reduce_lr,
                                 tb],
                      verbose=1,
                      validation_split=.3,
                      shuffle = True,
                      epochs=epochs)

print ('done')

Run Code Online (Sandbox Code Playgroud)

Answer 1

DJK*_*DJK 4

问题是输出层激活函数。softmax如果输出数组如下所示，则只能用于两类结构

[[1,0],
 [1,0],
 [0,1]]

Run Code Online (Sandbox Code Playgroud)

softmax假设输出总和为 1，因此如果您对输入值进行预测，您会发现它们全部为 1，因此损失永远不会减少。您可以像上面那样设置输出，也可以保持相同，作为单个二进制列，如下所示

[[1],
 [1],
 [0]]

Run Code Online (Sandbox Code Playgroud)

并使用sigmoid激活函数，在 0-1 范围内优化输出。所以你的最后一层将是

t = Dense((num_classes),activation='sigmoid',name='s_den1') (t)

Run Code Online (Sandbox Code Playgroud)

我回到原来的论文，看到了他们的lr策略。更改了代码，它的学习速度确实非常慢。因此，在您纠正后，问题是我错误地假设测试数据会学得更快，而且我不耐烦！ (2认同)

归档时间：	8 年，11 月前
查看次数：	5585 次
最近记录：	8 年，11 月前