Pet*_*etr 8 numpy pytorch onnx
我正在学习这个新的 ONNX 框架,它允许我们将深度学习(和其他)模型部署到生产中。
然而,我缺少一件事。我认为拥有这样一个框架的主要原因是为了推理目的,例如当我们有一个经过训练的模型并希望在不同的 venv 中使用它时(例如我们不能拥有 PyTorch),该模型仍然可以使用。
我在这里准备了一个“从头开始”的例子:
# Modules
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torchvision
import onnx
import onnxruntime
import matplotlib.pyplot as plt
import numpy as np
# %config Completer.use_jedi = False
# MNIST Example dataset
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST(
'data', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])),
batch_size=800)
# Take data and labels "by hand"
inputs_batch, labels_batch = next(iter(train_loader))
# Simple Model
class CNN(nn.Module):
def __init__(self, in_channels, num_classes):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channels,
out_channels = 10, kernel_size = (3, 3), stride = (1, 1), padding=(1, 1))
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride = (2, 2))
self.conv2 = nn.Conv2d(in_channels = 10, out_channels=16, kernel_size = (3, 3), stride = (1, 1), padding=(1, 1))
self.fc1 = nn.Linear(16*7*7, num_classes)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.reshape(x.shape[0], -1)
x = self.fc1(x)
return x
# Training setting
device = 'cpu'
batch_size = 64
learning_rate = 0.001
n_epochs = 10
# Dataset prep
dataset = TensorDataset(inputs_batch, labels_batch)
TRAIN_DF = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True)
# Model Init
model = CNN(in_channels=1, num_classes=10)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
# Training Loop
for epoch in range(n_epochs):
for data, labels in TRAIN_DF:
model.train()
# Send Data to GPU
data = data.to(device)
# Send Data to GPU
labels = labels.to(device)
# data = data.reshape(data.shape[0], -1)
# Forward
pred = model(data)
loss = F.cross_entropy(pred, labels)
# Backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Check Accuracy
def check_accuracy(loader, model):
num_correct = 0
num_total = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device)
y = y.to(device)
# x = x.reshape(x.shape[0], -1)
scores = model(x)
_, pred = scores.max(1)
num_correct += (pred == y).sum()
num_total += pred.size(0)
print(F"Got {num_correct} / {num_total} with accuracy {float(num_correct)/float(num_total)*100: .2f}")
check_accuracy(TRAIN_DF, model)
# Inference with ONNX
# Create Artifical data of the same size
img_size = 28
dummy_data = torch.randn(1, img_size, img_size)
dummy_input = torch.autograd.Variable(dummy_data).unsqueeze(0)
input_name = "input"
output_name = "output"
model_eval = model.eval()
torch.onnx.export(
model_eval,
dummy_input,
"model_CNN.onnx",
input_names=["input"],
output_names=["output"],
)
# Take Random Image from Training Data
X_pred = data[4].unsqueeze(0)
# Convert the Tensor image to PURE numpy and pretend we are working in venv where we only have numpy - NO PYTORCH
X_pred_np = X_pred.numpy()
X_pred_np = np.array(X_pred_np)
IMG_Rando = np.random.rand(1, 1, 28, 28)
np.shape(X_pred_np) == np.shape(IMG_Rando)
ort_session = onnxruntime.InferenceSession(
"model_CNN.onnx"
)
def to_numpy(tensor):
return (
tensor.detach().gpu().numpy()
if tensor.requires_grad
else tensor.cpu().numpy()
)
# compute ONNX Runtime output prediction
# WORKS
# ort_inputs = {ort_session.get_inputs()[0].name: X_pred_np}
# DOES NOT WORK
ort_inputs = {ort_session.get_inputs()[0].name: IMG_Rando}
# WORKS
# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(X_pred)}
ort_outs = ort_session.run(None, ort_inputs)
ort_outs
Run Code Online (Sandbox Code Playgroud)
首先,我们创建一个简单的模型并在 MNIST 数据集上对其进行训练。
然后我们使用 ONNX 框架导出训练后的模型。现在,当我想使用它对图像进行分类时,X_pred_np即使它是“纯”NumPy,它也能工作,这就是我想要的。
然而,我怀疑这种特殊情况之所以有效,只是因为它是从 PyTorch 张量对象派生的,因此“在幕后”它仍然具有 PyTorch 属性。当我尝试推断随机“纯”NumPy 对象时IMG_Rando,似乎存在一个问题:
Unexpected input data type. Actual: (tensor(double)) , expected: (tensor(float))。
需要引用 PyTorch 表单。有没有办法只使用 numpy 图像进行 ONNX 预测?那么推理可以在没有安装pytorch的单独的venv中进行吗?
其次,ONNX 有没有办法记住实际的类?
在这种特殊情况下,索引对应于图像的标签。然而,在动物分类中,ONNX不会为我们提供“DOG”和“CAT”等标签,而只会为我们提供预测标签的索引。我们需要运行抛出我们自己的“预测字典”,这样我们就知道第五个标签与“猫”相关联,第六个标签与“狗”相关联等。
Les*_*rel 10
Numpy 默认为 ,float64而 pytorch 默认为float32. 将输入投射到float32推理之前:
IMG_Rando = np.random.rand(1, 1, 28, 28).astype(np.float32)
Run Code Online (Sandbox Code Playgroud)
double是双精度浮点格式的缩写,是64位上的浮点数表示,而float是指32位上的浮点数。
小智 4
作为对已接受答案的改进,在 Numpy 中生成随机数的惯用方法现在是使用Generator。这样做的好处是能够直接以正确的类型创建数组,而不是使用复制数组的astype昂贵操作(如已接受的答案中所示)。因此,改进后的解决方案如下所示:
rng = np.random.default_rng() # set seed if desired
IMG_Rando = rng.random((1, 1, 28, 28), dtype=np.float32)
Run Code Online (Sandbox Code Playgroud)