SH_*_*ity 5 python classification keras tensorflow tsne
我正在尝试在张量流中实现 t-SNE 可视化以执行图像分类任务。我主要在网上找到的都已经在Pytorch中实现了。看这里。
这是我用于训练目的的通用代码,它工作得很好,只是想向其中添加 t-SNE 可视化:
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
from tensorflow import keras
from tensorflow.keras import layers, Input
from tensorflow.keras.layers import Dense, InputLayer, Flatten
from tensorflow.keras.models import Sequential, Model
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
.
.
.
base_model=tf.keras.applications.ResNet152(
include_top=False, weights='imagenet', input_tensor=None,
input_shape=None, pooling=None)
.
.
.
base_model.trainable = False
# Create new model on top.
inputs = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
x = base_model(inputs, training=False)
x=keras.layers.Flatten()(x)
x = keras.layers.Dense(64)(x)
x=layers.Activation('relu')(x)
x=keras.layers.Flatten()(x)
x = keras.layers.Dense(32)(x)
x=layers.Activation('relu')(x)
x = keras.layers.Dense(2)(x)
outputs=layers.Activation('softmax')(x)
model=keras.Model(inputs, outputs)
vaidation_datagen = ImageDataGenerator(rotation_range=90,
zoom_range=0.2,
horizontal_flip=True,
vertical_flip=True)
train_generator = train_datagen.flow_from_directory(
train_path, # this is the target directory
target_size=target_size, # all images will be resized to the target size
color_mode='rgb',
batch_size=batch_size,
shuffle=True,
class_mode='categorical',
interpolation='nearest',
seed=42) # since we use binary_crossentropy loss, we need binary labels
validation_generator = vaidation_datagen.flow_from_directory(
validation_path, # this is the target directory
target_size=target_size, # all images will be resized to the target size
color_mode='rgb',
batch_size=batch_size,
shuffle=True,
class_mode='categorical',
interpolation='nearest',
seed=42)
model.compile(optimizer, loss , metrics)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint((model_path+model_filename), monitor='val_loss',verbose=1, save_best_only=True)
model.summary()
history = model.fit(
train_generator,
steps_per_epoch = num_of_train_img_raw//batch_size,
epochs = epochs,
validation_data = validation_generator, # relates to the validation data.
validation_steps = num_of_val_img_raw//batch_size,
callbacks=[model_checkpoint],
use_multiprocessing = False)
Run Code Online (Sandbox Code Playgroud)
根据提供的参考链接,我似乎需要首先保存功能,然后从那里应用 t-SNE,如下所示(这部分是从此处复制并粘贴的):
tsne = TSNE(n_components=2).fit_transform(features)
# scale and move the coordinates so they fit [0; 1] range
def scale_to_01_range(x):
# compute the distribution range
value_range = (np.max(x) - np.min(x))
# move the distribution so that it starts from zero
# by extracting the minimal value from all its values
starts_from_zero = x - np.min(x)
# make the distribution fit [0; 1] by dividing by its range
return starts_from_zero / value_range
# extract x and y coordinates representing the positions of the images on T-SNE plot
tx = tsne[:, 0]
ty = tsne[:, 1]
tx = scale_to_01_range(tx)
ty = scale_to_01_range(ty)
# initialize a matplotlib plot
fig = plt.figure()
ax = fig.add_subplot(111)
# for every class, we'll add a scatter plot separately
for label in colors_per_class:
# find the samples of the current class in the data
indices = [i for i, l in enumerate(labels) if l == label]
# extract the coordinates of the points of this class only
current_tx = np.take(tx, indices)
current_ty = np.take(ty, indices)
# convert the class color to matplotlib format
color = np.array(colors_per_class[label], dtype=np.float) / 255
# add a scatter plot with the corresponding color and label
ax.scatter(current_tx, current_ty, c=color, label=label)
# build a legend using the labels we set previously
ax.legend(loc='best')
# finally, show the plot
plt.show()
Run Code Online (Sandbox Code Playgroud)
我将非常感谢您帮助连接这两部分。
您可以尝试如下操作:
训练你的模型
import tensorflow as tf
import pathlib
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)
batch_size = 32
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
seed=123,
image_size=(180, 180),
batch_size=batch_size)
model = tf.keras.Sequential([
tf.keras.layers.Rescaling(1./255, input_shape=(180, 180, 3)),
tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(5)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
epochs=10
history = model.fit(
train_ds,
epochs=epochs
)
Run Code Online (Sandbox Code Playgroud)
对模型的最后一层和倒数第二层进行预测并可视化
from sklearn.manifold import TSNE
import numpy as np
from matplotlib import pyplot as plt
model2 = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output)
test_ds = np.concatenate(list(train_ds.take(5).map(lambda x, y : x))) # get five batches of images and convert to numpy array
features = model2(test_ds)
labels = np.argmax(model(test_ds), axis=-1)
tsne = TSNE(n_components=2).fit_transform(features)
def scale_to_01_range(x):
value_range = (np.max(x) - np.min(x))
starts_from_zero = x - np.min(x)
return starts_from_zero / value_range
tx = tsne[:, 0]
ty = tsne[:, 1]
tx = scale_to_01_range(tx)
ty = scale_to_01_range(ty)
colors = ['red', 'blue', 'green', 'brown', 'yellow']
classes = train_ds.class_names
print(classes)
fig = plt.figure()
ax = fig.add_subplot(111)
for idx, c in enumerate(colors):
indices = [i for i, l in enumerate(labels) if idx == l]
current_tx = np.take(tx, indices)
current_ty = np.take(ty, indices)
ax.scatter(current_tx, current_ty, c=c, label=classes[idx])
ax.legend(loc='best')
plt.show()
Run Code Online (Sandbox Code Playgroud)
model2
输出您想要可视化的特征,并model
借助 输出预测的类np.argmax
。此外,此示例使用具有 5 个类别的数据集,这就是有 5 种不同颜色的原因。就您而言,您只有 2 个类别,因此只有 2 种颜色。