Custom Neural Network Implementation on MNIST using Tensorflow 2.0?

use*_*396 18 python neural-network python-3.x tensorflow tensorflow2.0

I tried to write a custom implementation of basic neural network with two hidden layers on MNIST dataset using *TensorFlow 2.0 beta* but I'm not sure what went wrong here but my training loss and accuracy seems to stuck at 1.5 and around 85 respectively. But If I build the using Keras I was getting very low training loss and accuracy above 95% with just 8-10 epochs.

I believe that maybe I'm not updating my weights or something? So do I need to assign my new weights which I compute in backprop function backs to their respective weights/bias variables?

I really appreciate if someone could help me out with this and these few more questions that I've mentioned below.

Few more Questions:

1) How to add a Dropout and Batch Normalization layer in this custom implementation? (i.e making it work for both train and test time)

2) How can I use callbacks in this code? i.e (making use of EarlyStopping and ModelCheckpoint callbacks)

3) Is there anything else in my code below that I can optimize further in this code like maybe making use of tensorflow 2.x @tf.function decorator etc.)

4) I would also require to extract the final weights that I obtain for plotting and checking their distributions. To investigate issues like gradient vanishing or exploding. (Eg: Maybe Tensorboard)

5) I also want help in writing this code in a more generalized way so I can easily implement other networks like ConvNets (i.e Conv, MaxPool, etc.) based on this code easily.

Here's my full code for easy reproducibility :

Note: I know I can use high-level API like Keras to build the model much easier but that is not my goal here. Please understand.

import numpy as np
import os
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
import tensorflow as tf
import tensorflow_datasets as tfds

(x_train, y_train), (x_test, y_test) = tfds.load('mnist', split=['train', 'test'], 
                                                  batch_size=-1, as_supervised=True)

# reshaping
x_train = tf.reshape(x_train, shape=(x_train.shape[0], 784))
x_test  = tf.reshape(x_test, shape=(x_test.shape[0], 784))

ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# rescaling
ds_train = ds_train.map(lambda x, y: (tf.cast(x, tf.float32)/255.0, y))

class Model(object):
    def __init__(self, hidden1_size, hidden2_size, device=None):
        # layer sizes along with input and output
        self.input_size, self.output_size, self.device = 784, 10, device
        self.hidden1_size, self.hidden2_size = hidden1_size, hidden2_size
        self.lr_rate = 1e-03

        # weights initializationg
        self.glorot_init = tf.initializers.glorot_uniform(seed=42)
        # weights b/w input to hidden1 --> 1
        self.w_h1 = tf.Variable(self.glorot_init((self.input_size, self.hidden1_size)))
        # weights b/w hidden1 to hidden2 ---> 2
        self.w_h2 = tf.Variable(self.glorot_init((self.hidden1_size, self.hidden2_size)))
        # weights b/w hidden2 to output ---> 3
        self.w_out = tf.Variable(self.glorot_init((self.hidden2_size, self.output_size)))

        # bias initialization
        self.b1 = tf.Variable(self.glorot_init((self.hidden1_size,)))
        self.b2 = tf.Variable(self.glorot_init((self.hidden2_size,)))
        self.b_out = tf.Variable(self.glorot_init((self.output_size,)))

        self.variables = [self.w_h1, self.b1, self.w_h2, self.b2, self.w_out, self.b_out]


    def feed_forward(self, x):
        if self.device is not None:
            with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                # layer1
                self.layer1 = tf.nn.sigmoid(tf.add(tf.matmul(x, self.w_h1), self.b1))
                # layer2
                self.layer2 = tf.nn.sigmoid(tf.add(tf.matmul(self.layer1,
                                                             self.w_h2), self.b2))
                # output layer
                self.output = tf.nn.softmax(tf.add(tf.matmul(self.layer2,
                                                             self.w_out), self.b_out))
        return self.output

    def loss_fn(self, y_pred, y_true):
        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, 
                                                                  logits=y_pred)
        return tf.reduce_mean(self.loss)

    def acc_fn(self, y_pred, y_true):
        y_pred = tf.cast(tf.argmax(y_pred, axis=1), tf.int32)
        y_true = tf.cast(y_true, tf.int32)
        predictions = tf.cast(tf.equal(y_true, y_pred), tf.float32)
        return tf.reduce_mean(predictions)

    def backward_prop(self, batch_xs, batch_ys):
        optimizer = tf.keras.optimizers.Adam(learning_rate=self.lr_rate)
        with tf.GradientTape() as tape:
            predicted = self.feed_forward(batch_xs)
            step_loss = self.loss_fn(predicted, batch_ys)
        grads = tape.gradient(step_loss, self.variables)
        optimizer.apply_gradients(zip(grads, self.variables))

n_shape = x_train.shape[0]
epochs = 20
batch_size = 128

ds_train = ds_train.repeat().shuffle(n_shape).batch(batch_size).prefetch(batch_size)

neural_net = Model(512, 256, 'gpu')

for epoch in range(epochs):
    no_steps = n_shape//batch_size
    avg_loss = 0.
    avg_acc = 0.
    for (batch_xs, batch_ys) in ds_train.take(no_steps):
        preds = neural_net.feed_forward(batch_xs)
        avg_loss += float(neural_net.loss_fn(preds, batch_ys)/no_steps) 
        avg_acc += float(neural_net.acc_fn(preds, batch_ys) /no_steps)
        neural_net.backward_prop(batch_xs, batch_ys)
    print(f'Epoch: {epoch}, Training Loss: {avg_loss}, Training ACC: {avg_acc}')

# output for 10 epochs:
Epoch: 0, Training Loss: 1.7005115111824125, Training ACC: 0.7603832868262543
Epoch: 1, Training Loss: 1.6052448933478445, Training ACC: 0.8524806404020637
Epoch: 2, Training Loss: 1.5905528008006513, Training ACC: 0.8664196092868224
Epoch: 3, Training Loss: 1.584107405738905, Training ACC: 0.8727630912326276
Epoch: 4, Training Loss: 1.5792385798413306, Training ACC: 0.8773203844903037
Epoch: 5, Training Loss: 1.5759121985174716, Training ACC: 0.8804754322627559
Epoch: 6, Training Loss: 1.5739163148682564, Training ACC: 0.8826455712551251
Epoch: 7, Training Loss: 1.5722616605926305, Training ACC: 0.8840812018606812
Epoch: 8, Training Loss: 1.569699136307463, Training ACC: 0.8867688354803249
Epoch: 9, Training Loss: 1.5679460542742163, Training ACC: 0.8885049475356936
Run Code Online (Sandbox Code Playgroud)

Szy*_*zke 18

我想知道从哪里开始您的多问题,我决定从下面开始声明:

您的代码绝对不应该那样,并且与当前的Tensorflow最佳实践相去甚远

抱歉,但是一步一步地调试它浪费了每个人的时间,并且不会使我们俩受益。

现在,转到第三点:

3)下面的代码中还有什么我可以进一步优化的代码,例如可以使用tensorflow 2.x @ tf.function decorator等)

是的,您可以使用tensorflow2.0功能,而您似乎正在远离这些功能(tf.function装饰器在这里实际上没有用,暂时将其保留)。

遵循新的准则也可以缓解您的第5点问题,即:

5)我还希望以更通用的方式来编写此代码,以便我可以轻松地基于此代码轻松实现其他网络,例如ConvNets(即Conv,MaxPool等)。

因为它是专门为此设计的。稍作介绍后,我将尝试通过几个步骤向您介绍这些概念:

1.将程序分为逻辑部分

Tensorflow在代码可读性方面造成了很大的伤害;tf1.x通常,所有内容都集中在一个地方,全局变量后跟函数定义,然后是另一个全局变量,或者也许是数据加载,所有这些都是一团糟。这并不是开发人员的错,因为系统的设计鼓励了这些行为。

现在,tf2.0鼓励程序员在工作中将其工作划分为类似于可以在中看到的结构pytorchchainer以及其他更加用户友好的框架。

1.1数据加载

您使用Tensorflow数据集走的很好,但没有明显的理由拒绝了。

这是带有注释的代码,发生了什么事:

# You already have tf.data.Dataset objects after load
(x_train, y_train), (x_test, y_test) = tfds.load('mnist', split=['train', 'test'], 
                                                  batch_size=-1, as_supervised=True)

# But you are reshaping them in a strange manner...
x_train = tf.reshape(x_train, shape=(x_train.shape[0], 784))
x_test  = tf.reshape(x_test, shape=(x_test.shape[0], 784))

# And building from slices...
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# Unreadable rescaling (there are built-ins for that)
Run Code Online (Sandbox Code Playgroud)

您可以轻松地将这一思想推广到任何数据集,并将其放在单独的模块中,例如datasets.py

import tensorflow as tf
import tensorflow_datasets as tfds


class ImageDatasetCreator:
    @classmethod
    # More portable and readable than dividing by 255
    def _convert_image_dtype(cls, dataset):
        return dataset.map(
            lambda image, label: (
                tf.image.convert_image_dtype(image, tf.float32),
                label,
            )
        )

    def __init__(self, name: str, batch: int, cache: bool = True, split=None):
        # Load dataset, every dataset has default train, test split
        dataset = tfds.load(name, as_supervised=True, split=split)
        # Convert to float range
        try:
            self.train = ImageDatasetCreator._convert_image_dtype(dataset["train"])
            self.test = ImageDatasetCreator._convert_image_dtype(dataset["test"])
        except KeyError as exception:
            raise ValueError(
                f"Dataset {name} does not have train and test, write your own custom dataset handler."
            ) from exception

        if cache:
            self.train = self.train.cache()  # speed things up considerably
            self.test = self.test.cache()

        self.batch: int = batch

    def get_train(self):
        return self.train.shuffle().batch(self.batch).repeat()

    def get_test(self):
        return self.test.batch(self.batch).repeat()
Run Code Online (Sandbox Code Playgroud)

因此,现在您可以mnist使用简单的命令加载更多的内容:

from datasets import ImageDatasetCreator

if __name__ == "__main__":
    dataloader = ImageDatasetCreator("mnist", batch=64, cache = True)
    train, test = dataloader.get_train(), dataloader.get_test()
Run Code Online (Sandbox Code Playgroud)

mnist从现在开始,您可以使用任何其他名称来加载数据集。

请停止编写与深度学习相关的所有内容,并且也要成为程序员

1.2模型创建

由于tf2.0有两种建议的方法,一种可以根据模型的复杂性进行:

  • tensorflow.keras.models.Sequential- @Stewart_R显示了这种方式,无需重申他的观点。用于最简单的模型(您应该将此模型与前馈一起使用)。
  • 继承tensorflow.keras.Model和编写自定义模型。当模块中有某种逻辑或更复杂(例如ResNets,多路径网络等)时,应使用此逻辑。总而言之,更具可读性和可定制性。

您的Model班级试图模仿类似的东西,但是又再次向南移动。backprop绝对不是模型本身的一部分,也不是lossaccuracy将它们分成另一个模块或函数,而不是成员!

就是说,让我们使用第二种方法对网络进行编码(model.py为简便起见,您应该将此代码放入其中)。在此之前,我将YourDense从头开始编写前馈层代码tf.keras.Layers(此代码可能会进入layers.py模块):

import tensorflow as tf

class YourDense(tf.keras.layers.Layer):
    def __init__(self, units):
        # It's Python 3, you don't have to specify super parents explicitly
        super().__init__()
        self.units = units

    # Use build to create variables, as shape can be inferred from previous layers
    # If you were to create layers in __init__, one would have to provide input_shape
    # (same as it occurs in PyTorch for example)
    def build(self, input_shape):
        # You could use different initializers here as well
        self.kernel = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        # You could define bias in __init__ as well as it's not input dependent
        self.bias = self.add_weight(shape=(self.units,), initializer="random_normal")
        # Oh, trainable=True is default

    def call(self, inputs):
        # Use overloaded operators instead of tf.add, better readability
        return tf.matmul(inputs, self.kernel) + self.bias
Run Code Online (Sandbox Code Playgroud)

关于你

1)如何在此自定义实现中添加一个Dropout和Batch Normalization层?(即使其在训练和测试时间内均有效)

我想您想为这些层创建一个自定义实现。如果没有,您可以from tensorflow.keras.layers import Dropout按照@Leevo的说明将其导入并在任何需要的地方使用。在以下期间train和之后具有不同行为的反向辍学test

class CustomDropout(layers.Layer):
    def __init__(self, rate, **kwargs):
        super().__init__(**kwargs)
        self.rate = rate

    def call(self, inputs, training=None):
        if training:
            # You could simply create binary mask and multiply here
            return tf.nn.dropout(inputs, rate=self.rate)
        # You would need to multiply by dropout rate if you were to do that
        return inputs
Run Code Online (Sandbox Code Playgroud)

从此处获取并修改以更好地适合展示目的的图层。

现在,您终于可以创建模型了(简单的双前馈):

import tensorflow as tf

from layers import YourDense


class Model(tf.keras.Model):
    def __init__(self):
        super().__init__()
        # Use Sequential here for readability
        self.network = tf.keras.Sequential(
            [YourDense(100), tf.keras.layers.ReLU(), YourDense(10)]
        )

    def call(self, inputs):
        # You can use non-parametric layers inside call as well
        flattened = tf.keras.layers.Flatten()(inputs)
        return self.network(flattened)
Run Code Online (Sandbox Code Playgroud)

Ofc,在常规实现中应尽可能使用内置函数。

这种结构是相当可扩展的,因此可以泛化为卷积网络,resnets,senets,无论应通过此模块执行什么操作。您可以在此处了解更多信息。

我认为这符合您的第5点:

5)我还希望以更通用的方式来编写此代码,以便我可以轻松地基于此代码轻松实现其他网络,例如ConvNets(即Conv,MaxPool等)。

最后一件事,您可能必须使用model.build(shape)它来构建模型图。

model.build((None, 28, 28, 1))
Run Code Online (Sandbox Code Playgroud)

这将用于MNIST的28x28x1输入形状,其中None代表批处理。

1.3培训

再一次,培训可以通过两种不同的方式进行:

  • 标准Keras-model.fit(dataset)在分类等简单任务中很有用
  • tf.GradientTape-更复杂的训练方案,最突出的例子是“ 生成对抗网络”,其中两个模型在玩minmax游戏时优化正交目标

正如@Leevo再次指出的那样,如果要使用第二种方法,将无法简单地使用Keras提供的回调,因此,我建议尽可能使用第一种方法。

从理论上讲,您可以像on_batch_begin()需要时手动调用回调函数一样,也可以在需要时调用其他函数,但这会很麻烦,而且我不确定这将如何工作。

谈到第一个选项,您可以tf.data.Dataset直接使用适合的对象。这是另一个模块(最好是train.py)中的内容:

def train(
    model: tf.keras.Model,
    path: str,
    train: tf.data.Dataset,
    epochs: int,
    steps_per_epoch: int,
    validation: tf.data.Dataset,
    steps_per_validation: int,
    stopping_epochs: int,
    optimizer=tf.optimizers.Adam(),
):
    model.compile(
        optimizer=optimizer,
        # I used logits as output from the last layer, hence this
        loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[tf.metrics.SparseCategoricalAccuracy()],
    )

    model.fit(
        train,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=validation,
        validation_steps=steps_per_validation,
        callbacks=[
            # Tensorboard logging
            tf.keras.callbacks.TensorBoard(
                pathlib.Path("logs")
                / pathlib.Path(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")),
                histogram_freq=1,
            ),
            # Early stopping with best weights preserving
            tf.keras.callbacks.EarlyStopping(
                monitor="val_sparse_categorical_accuracy",
                patience=stopping_epochs,
                restore_best_weights=True,
            ),
        ],
    )
    model.save(path)
Run Code Online (Sandbox Code Playgroud)

更复杂的方法与PyTorch训练循环非常相似(几乎是复制和粘贴),因此,如果您熟悉这些循环,就不会造成太大的问题。

您可以在整个tf2.0文档中找到示例,例如herehere

2.其他事项

2.1未回答的问题

4)代码中还有其他需要进一步优化的内容吗?即(使用tensorflow 2.x @ tf.function装饰器等)

上面已经将Model转换为图形,因此在这种情况下调用它不会使您受益。过早的优化是万恶之源,请记住在执行此操作之前先对代码进行评估。

如果适当地缓存数据(如#1.1开头所述)和良好的管道传输(而不是那些缓存),您将获得更多收益。

5)另外,我还需要一种在训练后为所有图层提取所有最终权重的方法,以便可以绘制它们并检查它们的分布。检查诸如梯度消失或爆炸之类的问题。

正如上面@Leevo所指出的,

weights = model.get_weights()
Run Code Online (Sandbox Code Playgroud)

会帮助您减肥。你可以将其转化为np.array使用积seabornmatplotlib,分析,支票或任何其他你想要的。

2.2放在一起

总而言之,您的main.py(或入口点或类似的东西)将包括以下内容(或多或少):

from dataset import ImageDatasetCreator
from model import Model
from train import train

# You could use argparse for things like batch, epochs etc.
if __name__ == "__main__":
    dataloader = ImageDatasetCreator("mnist", batch=64, cache=True)
    train, test = dataloader.get_train(), dataloader.get_test()
    model = Model()
    model.build((None, 28, 28, 1))
    train(
        model, train, path epochs, test, len(train) // batch, len(test) // batch, ...
    )  # provide necessary arguments appropriately
    # Do whatever you want with those
    weights = model.get_weights()
Run Code Online (Sandbox Code Playgroud)

哦,请记住,上面的功能不是复制粘贴的功能,应将其视为准则。如有任何疑问请打我。

3.评论问题

3.1如何初始化自定义和内置图层

3.1.1 TLDR您将要阅读的内容

  • 自定义Poisson初始化函数,但需要三个 参数
  • tf.keras.initalizationAPI需要两个参数(请参阅其文档的最后一点),因此一个是通过lambda我们之前编写的Python 内部自定义层 指定的
  • 添加了该层的可选偏压,可以使用布尔值将其关闭

为什么如此无用的复杂?为了说明这一点,tf2.0您最终可以使用Python的功能,不再需要麻烦的图形,if而不必tf.cond等。

3.1.2从TLDR到实施

Keras初始化,可以发现这里和Tensorflow的味道在这里

请注意API的不一致(大写字母,如类,小写字母,带有下划线,如函数),尤其是在中tf2.0,但这不是重点。

您可以通过传递字符串(如上YourDense所示)或在对象创建过程中使用它们。

为了允许在自定义层中进行自定义初始化,您可以简单地向构造函数添加其他参数(tf.keras.Model该类仍然是Python类,__init__应该与Python相同使用)。

在此之前,我将向您展示如何创建自定义初始化:

# Poisson custom initialization because why not.
def my_dumb_init(shape, lam, dtype=None):
    return tf.squeeze(tf.random.poisson(shape, lam, dtype=dtype))
Run Code Online (Sandbox Code Playgroud)

注意,它的签名带有三个参数,而(shape, dtype)只能带有三个参数。仍然可以在创建自己的图层时轻松地“修复”此问题,例如下面的内容(扩展YourLinear):

import typing

import tensorflow as tf


class YourDense(tf.keras.layers.Layer):
    # It's still Python, use it as Python, that's the point of tf.2.0
    @classmethod
    def register_initialization(cls, initializer):
        # Set defaults if init not provided by user
        if initializer is None:
            # let's make the signature proper for init in tf.keras
            return lambda shape, dtype: my_dumb_init(shape, 1, dtype)
        return initializer

    def __init__(
        self,
        units: int,
        bias: bool = True,
        # can be string or callable, some typing info added as well...
        kernel_initializer: typing.Union[str, typing.Callable] = None,
        bias_initializer: typing.Union[str, typing.Callable] = None,
    ):
        super().__init__()
        self.units: int = units
        self.kernel_initializer = YourDense.register_initialization(kernel_initializer)
        if bias:
            self.bias_initializer = YourDense.register_initialization(bias_initializer)
        else:
            self.bias_initializer = None

    def build(self, input_shape):
        # Simply pass your init here
        self.kernel = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer=self.kernel_initializer,
            trainable=True,
        )
        if self.bias_initializer is not None:
            self.bias = self.add_weight(
                shape=(self.units,), initializer=self.bias_initializer
            )
        else:
            self.bias = None

    def call(self, inputs):
        weights = tf.matmul(inputs, self.kernel)
        if self.bias is not None:
            return weights + self.bias
Run Code Online (Sandbox Code Playgroud)

我已添加my_dumb_initialization为默认值(如果用户不提供默认值),并使用bias参数使bias为可选。请注意if,只要不依赖数据,就可以自由使用。如果是(或者是依赖于tf.Tensor以某种方式),一要利用@tf.function装饰从而改变Python的流向它的tensorflow对应物(如iftf.cond)。

有关签名的更多信息,请参见此处,非常容易遵循。

如果要将上述初始化程序更改合并到模型中,则必须创建适当的对象。

... # Previous of code Model here
self.network = tf.keras.Sequential(
    [
        YourDense(100, bias=False, kernel_initializer="lecun_uniform"),
        tf.keras.layers.ReLU(),
        YourDense(10, bias_initializer=tf.initializers.Ones()),
    ]
)
... # and the same afterwards
Run Code Online (Sandbox Code Playgroud)

使用内置的tf.keras.layers.Dense图层,可以做到相同(参数名称不同,但是想法成立)。

3.2自动微分使用 tf.GradientTape

3.2.1简介

重点tf.GradientTape是允许用户使用普通Python控制流和变量相对于另一个变量的梯度计算。

示例取自此处,但分为多个部分:

def f(x, y):
  output = 1.0
  for i in range(y):
    if i > 1 and i < 5:
      output = tf.multiply(output, x)
  return output
Run Code Online (Sandbox Code Playgroud)

forif流控制语句的常规python函数

def grad(x, y):
  with tf.GradientTape() as t:
    t.watch(x)
    out = f(x, y)
  return t.gradient(out, x)
Run Code Online (Sandbox Code Playgroud)

使用渐变磁带,您可以记录所有操作Tensors(以及它们的中间状态)并向后“播放”(使用跟踪规则执行自动向后微分)。

每个Tensor内部tf.GradientTape()上下文管理器都会自动记录。如果某些Tensor不在范围内,请使用watch()上面看到的方法。

最后,output相对于的梯度x(返回输入)。

3.2.2与深度学习的联系

上面描述的是backpropagation算法。为网络中的每个节点(或每个层)计算(相对于)输出的梯度。然后,各种优化程序会使用这些梯度进行校正,因此重复进行。

让我们继续,并假设您已经设置了tf.keras.Model,优化程序实例tf.data.Dataset和损失函数。

可以定义一门Trainer课程来为我们进行培训。如有疑问,请阅读代码中的注释

class Trainer:
    def __init__(self, model, optimizer, loss_function):
        self.model = model
        self.loss_function = loss_function
        self.optimizer = optimizer
        # You could pass custom metrics in constructor
        # and adjust train_step and test_step accordingly
        self.train_loss = tf.keras.metrics.Mean(name="train_loss")
        self.test_loss = tf.keras.metrics.Mean(name="train_loss")

    def train_step(self, x, y):
        # Setup tape
        with tf.GradientTape() as tape:
            # Get current predictions of network
            y_pred = self.model(x)
            # Calculate loss generated by predictions
            loss = self.loss_function(y, y_pred)
        # Get gradients of loss w.r.t. EVERY trainable variable (iterable returned)
        gradients = tape.gradient(loss, self.model.trainable_variables)
        # Change trainable variable values according to gradient by applying optimizer policy
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
        # Record loss of current step
        self.train_loss(loss)

    def train(self, dataset):
        # For N epochs iterate over dataset and perform train steps each time
        for x, y in dataset:
            self.train_step(x, y)

    def test_step(self, x, y):
        # Record test loss separately
        self.test_loss(self.loss_function(y, self.model(x)))

    def test(self, dataset):
        # Iterate over whole dataset
        for x, y in dataset:
            self.test_step(x, y)

    def __str__(self):
        # You need Python 3.7 with f-string support
        # Just return metrics
        return f"Loss: {self.train_loss.result()}, Test Loss: {self.test_loss.result()}"
Run Code Online (Sandbox Code Playgroud)

现在,您可以像下面这样简单地在代码中使用此类:

EPOCHS = 5

# model, optimizer, loss defined beforehand
trainer = Trainer(model, optimizer, loss)
for _ in range(EPOCHS):
    trainer.train(train_dataset) # Same for training and test datasets
    trainer.test(test_dataset)
    print(f"Epoch {epoch}: {trainer})")
Run Code Online (Sandbox Code Playgroud)

印刷品会告诉您每个时期的训练和测试损失。您可以根据需要将培训和测试混合使用(例如5个培训时期和1个测试),还可以添加其他指标等。

如果您想要非面向OOP的方法,请参见此处(IMO的可读性较差,但每种方法都是独立的)。


Ste*_*t_R 5

另外,如果我可以改善代码中的某些内容,请也让我知道。

像这样使用高级API。您只需几行代码就可以完成此操作,调试,阅读和推理以下内容要容易得多:

(x_train, y_train), (x_test, y_test) = tfds.load('mnist', split=['train', 'test'], 
                                                  batch_size=-1, as_supervised=True)

x_train = tf.cast(tf.reshape(x_train, shape=(x_train.shape[0], 784)), tf.float32)
x_test  = tf.cast(tf.reshape(x_test, shape=(x_test.shape[0], 784)), tf.float32)

model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(256, activation='sigmoid'),
  tf.keras.layers.Dense(10, activation='softmax')
])
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)
Run Code Online (Sandbox Code Playgroud)

  • @ stewart-r谢谢:)我只是想知道它的工作原理要好一点,所以这就是为什么我不使用keras的原因。这些高级api变得如此简单,以至于我经常忘记幕后实际发生的事情。 (6认同)