无法使用 keras.load_model 保存/加载模型 - IndexError: list index out of range

Sto*_*son 9 python save keras tensorflow

我正在使用 tf 和 Keras 按照此处此处使用的方法创建 cycleGAN

网络结构相当复杂:很多模型相互嵌套。

我无法保存和重新加载经过训练的模型。

培训完成后,我使用

 generator_AtoB.save("models/generator_AtoB.h5")
Run Code Online (Sandbox Code Playgroud)

pickle.dump(generator_AtoB, saveFile)
Run Code Online (Sandbox Code Playgroud)

保存模型:这不会导致错误,并在提供的路径中创建一个文件。

通过检查h5dump | less我可以看到 .h5 文件包含数据。

稍后使用 keras 重新加载模型:

generator_AtoB = load_model("models/generator_AtoB.h5")
Run Code Online (Sandbox Code Playgroud)

或泡菜:

pickle.load(saveFile)
Run Code Online (Sandbox Code Playgroud)

导致错误:

Traceback (most recent call last):
File "test_model.py", line 14, in <module>
generator_AtoB = pickle.load(saveFile)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/network.py", line 1266, in __setstate__
model = saving.unpickle_model(state)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 435, in unpickle_model
return _deserialize_model(f)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 274, in _deserialize_model
reshape=False)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 682, in preprocess_weights_for_loading
weights = convert_nested_model(weights)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 658, in convert_nested_model
original_backend=original_backend))
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 682, in preprocess_weights_for_loading
weights = convert_nested_model(weights)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 670, in convert_nested_model
original_backend=original_backend))
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 682, in preprocess_weights_for_loading
weights = convert_nested_model(weights)
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 658, in convert_nested_model
original_backend=original_backend))
File "/home/MYUSERNAME/.virtualenvs/tensorflow_py3/lib/python3.5/site-packages/keras/engine/saving.py", line 800, in preprocess_weights_for_loading
elif layer_weights_shape != weights[0].shape:
IndexError: list index out of range
Run Code Online (Sandbox Code Playgroud)

如果使用keras.load_model或,错误是相同的pickle.load

由于这是通过使用 keras 或 pickle 保存而发生的,我认为这不是保存/加载问题,而是我在模型中错误地保存的东西,但我在任何地方都找不到任何参考。

感谢您的帮助

完整代码:

    #!/usr/bin/env python
# -*- coding: UTF-8 -*-

# https://hardikbansal.github.io/CycleGANBlog/
import sys
import time

import numpy as np
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Input, multiply, add as kadd
from keras.layers import Conv2D, BatchNormalization, Conv2DTranspose
from keras.layers import LeakyReLU, ReLU
from keras.layers import Activation

from keras.preprocessing.image import ImageDataGenerator

from PIL import Image



ngf = 32 # Number of filters in first layer of generator
ndf = 64 # Number of filters in first layer of discriminator
BATCH_SIZE = 1 # batch_size
pool_size = 50 # pool_size
IMG_WIDTH = 256 # Imput image will of width 256
IMG_HEIGHT = 256 # Input image will be of height 256
IMG_DEPTH = 3 # RGB format

DISCRIMINATOR_ITERATIONS = 1
SAVE_IMAGES_INTERVAL = 25

ITERATIONS = 5000
FAKE_POOL_SIZE=25
INPUT_SHAPE = (IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH)


def resnet_block(num_features):

    block = Sequential()
    block.add(Conv2D(num_features, kernel_size=3, strides=1, padding="SAME"))
    block.add(BatchNormalization())
    block.add(ReLU())
    block.add(Conv2D(num_features, kernel_size=3, strides=1, padding="SAME"))
    block.add(BatchNormalization())
    block.add(ReLU())


    resblock_input = Input(shape=(64, 64, 256))
    conv_model = block(resblock_input)

    _sum = kadd([resblock_input, conv_model])

    composed =  Model(inputs=[resblock_input], outputs=_sum)
    return composed


def discriminator( f=4, name=None):
    d = Sequential()
    d.add(Conv2D(ndf, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_1"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Conv2D(ndf * 2, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_2"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Conv2D(ndf * 4, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_3"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Conv2D(ndf * 8, kernel_size=f, strides=2, padding="SAME", name="discr_conv2d_4"))
    d.add(BatchNormalization())
    d.add(LeakyReLU(0.2))
    d.add(Conv2D(1, kernel_size=f, strides=1, padding="SAME", name="discr_conv2d_out"))

    # d.add(Activation("sigmoid"))


    model_input = Input(shape=INPUT_SHAPE)

    decision  = d(model_input)

    composed = Model(model_input, decision)
    # print(d.output_shape)
    # d.summary()

    return composed

def generator(name=None):

    g = Sequential()
    # ENCODER
    g.add(Conv2D(ngf, kernel_size=7,
            strides=1,
            # activation='relu',
            padding='SAME',
            input_shape=INPUT_SHAPE,
            name="encoder_0" ))


    g.add(Conv2D(64*2, kernel_size=3,
            strides=2,
            padding='SAME',
            name="encoder_1" ))
    # output shape = (128, 128, 128)


    g.add(Conv2D(64*4, kernel_size=3,
            padding="SAME",
            strides=2,))
    # output shape = (64, 64, 256)

    # END ENCODER


    # TRANSFORM

    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))
    g.add(resnet_block(64*4))

    # END TRANSFORM
    # generator.shape = (64, 64, 256)

    # DECODER

    g.add(Conv2DTranspose(ngf*2,kernel_size=3, strides=2, padding="SAME"))
    g.add(Conv2DTranspose(ngf*2,kernel_size=3, strides=2, padding="SAME"))

    g.add(Conv2D(3,kernel_size=7, strides=1, padding="SAME"))

    # END DECODER

    model_input = Input(shape=INPUT_SHAPE)
    generated_image = g(model_input)

    composed = Model(model_input, generated_image, name=name)
    return composed


def fromMinusOneToOne(x):
    return x/127.5 -1

def toRGB(x):
    return (1+x) * 127.5


def createImageGenerator( subset="train", data_type="A", batch_size=1, pp=None):

    # we create two instances with the same arguments
    data_gen_args = dict(
                         preprocessing_function= pp,
                         zoom_range=0.1)

    image_datagen = ImageDataGenerator(**data_gen_args)

    # Provide the same seed and keyword arguments to the fit and flow methods
    seed = 1

    image_directory=subset+data_type
    print('data/vangogh2photo/'+image_directory)
    image_generator = image_datagen.flow_from_directory(
        'data/vangogh2photo/'+image_directory,
        class_mode=None,
        batch_size=batch_size,
        seed=seed)

    return image_generator


if __name__ == '__main__':

    generator_AtoB = generator(name="gen_A")
    generator_BtoA = generator(name="gen_B")

    discriminator_A = discriminator(name="disc_A")
    discriminator_B = discriminator(name="disc_B")


    # input_A = Input(batch_shape=(batch_size, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_A")
    input_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_A")
    generated_B = generator_AtoB(input_A)
    discriminator_generated_B = discriminator_B(generated_B)
    cyc_A = generator_BtoA(generated_B)


    input_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="input_B")
    generated_A = generator_BtoA(input_B)
    discriminator_generated_A = discriminator_A(generated_A )
    cyc_B = generator_AtoB(generated_A)


    ### GENERATOR TRAINING
    optim = keras.optimizers.Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=1e-08)


    # cyclic error is increased, because it's more important
    cyclic_weight_multipier = 10

    generator_trainer =  Model([input_A, input_B],
                 [discriminator_generated_B,   discriminator_generated_A,
                 cyc_A,      cyc_B,])

    losses =         [ "MSE", "MSE", "MAE",                   "MAE"]
    losses_weights = [ 1,     1,     cyclic_weight_multipier, cyclic_weight_multipier]

    generator_trainer.compile(optimizer=optim, loss = losses, loss_weights=losses_weights)



    ### DISCRIMINATOR TRAINING

    disc_optim = keras.optimizers.Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=1e-08)

    real_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_real_A")
    real_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_real_B")

    generated_A = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_gen_A")
    generated_B = Input(batch_shape=(None, IMG_WIDTH, IMG_HEIGHT, IMG_DEPTH), name="in_gen_B")

    discriminator_real_A = discriminator_A(real_A)
    discriminator_generated_A = discriminator_A(generated_A)
    discriminator_real_B =  discriminator_B(real_B)
    discriminator_generated_B = discriminator_B(generated_B)

    disc_trainer = Model([real_A, generated_A, real_B, generated_B],
                         [  discriminator_real_A,
                            discriminator_generated_A,
                            discriminator_real_B,
                            discriminator_generated_B] )


    disc_trainer.compile(optimizer=disc_optim, loss = 'MSE')


    #########
    ##
    ## TRAINING
    ##
    #########


    fake_A_pool = []
    fake_B_pool = []


    ones = np.ones((BATCH_SIZE,)+ generator_trainer.output_shape[0][1:])
    zeros = np.zeros((BATCH_SIZE,)+ generator_trainer.output_shape[0][1:])


    train_A_image_generator = createImageGenerator("train", "A")
    train_B_image_generator = createImageGenerator("train", "B")


    it = 1
    while it  < ITERATIONS:
        start = time.time()
        print("\nIteration %d " % it)
        sys.stdout.flush()

        # THIS ONLY WORKS IF BATCH SIZE == 1
        real_A = train_A_image_generator.next()

        real_B = train_B_image_generator.next()

        fake_A_pool.extend(generator_BtoA.predict(real_B))
        fake_B_pool.extend(generator_AtoB.predict(real_A))

        #resize pool
        fake_A_pool = fake_A_pool[-FAKE_POOL_SIZE:]
        fake_B_pool = fake_B_pool[-FAKE_POOL_SIZE:]

        fake_A = [ fake_A_pool[ind] for ind in np.random.choice(len(fake_A_pool), size=(BATCH_SIZE,), replace=False) ]
        fake_B = [ fake_B_pool[ind] for ind in np.random.choice(len(fake_B_pool), size=(BATCH_SIZE,), replace=False) ]

        fake_A = np.array(fake_A)
        fake_B = np.array(fake_B)


        for x in range(0, DISCRIMINATOR_ITERATIONS):
            _, D_loss_real_A, D_loss_fake_A, D_loss_real_B, D_loss_fake_B = \
            disc_trainer.train_on_batch(
                [real_A, fake_A, real_B, fake_B],
                [zeros, ones * 0.9, zeros, ones * 0.9] )


        print("=====")
        print("Discriminator loss:")
        print("Real A: %s, Fake A: %s || Real B: %s, Fake B: %s " % ( D_loss_real_A, D_loss_fake_A, D_loss_real_B, D_loss_fake_B))

        _, G_loss_fake_B, G_loss_fake_A, G_loss_rec_A, G_loss_rec_B = \
            generator_trainer.train_on_batch(
                [real_A, real_B],
                [zeros, zeros, real_A, real_B])


        print("=====")
        print("Generator loss:")
        print("Fake B: %s, Cyclic A: %s || Fake A: %s, Cyclic B: %s " % (G_loss_fake_B, G_loss_rec_A, G_loss_fake_A, G_loss_rec_B))

        end = time.time()
        print("Iteration time: %s s" % (end-start))
        sys.stdout.flush()

        if not (it % SAVE_IMAGES_INTERVAL ):
            imgA = real_A
            # print(imgA.shape)
            imga2b = generator_AtoB.predict(imgA)
            # print(imga2b.shape)
            imga2b2a = generator_BtoA.predict(imga2b)
            # print(imga2b2a.shape)
            imgB = real_B
            imgb2a = generator_BtoA.predict(imgB)
            imgb2a2b = generator_AtoB.predict(imgb2a)

            c = np.concatenate([imgA, imga2b, imga2b2a, imgB, imgb2a, imgb2a2b], axis=2).astype(np.uint8)
            # print(c.shape)
            x = Image.fromarray(c[0])
            x.save("data/generated/iteration_%s.jpg" % str(it).zfill(4))

        it+=1


        generator_AtoB.save("models/generator_AtoB.h5")
        generator_BtoA.save("models/generator_BtoA.h5")
Run Code Online (Sandbox Code Playgroud)

小智 3

我也有同样的问题。Ankish 建议使用 tf.keras API 尝试解决这个问题。我不知道为什么,但是...

tf.keras.models.load_model("./saved_models/our_model.h5", compile=False)
Run Code Online (Sandbox Code Playgroud)

工作完美,同时

keras.models.load_model("./saved_models/our_model.h5")
Run Code Online (Sandbox Code Playgroud)

失败并出现此处列出的错误。将编译标志设置为 false 只是为了隐藏警告。