Pic*_*Man 3 python csv matplotlib pandas
import pandas as pd
from scipy import misc
import numpy as np
import matplotlib.pyplot as plt
W = {'img':[misc.imread('pic.jpg')]}
df = pd.DataFrame(W)
# This displays the image
plt.imshow(df.img1[0])
plt.show()
df.to_csv('mypic.csv')
new_df= pd.read_csv('mypic.csv')
# This does not display the image
plt.imshow(new_df.img1[0])
plt.show()
Run Code Online (Sandbox Code Playgroud)
当我尝试将图像显示为由csv文件加载时,我得到错误:图像数据无法转换为浮点数.但是,我在使用数据帧时能够正确显示图像df.
当我将df存储到csv文件时,我怀疑数据类型出了问题.我该如何解决这个问题?
编辑:我应该补充一点,我的主要目标是
Har*_*vey 12
如果您有包含 URL 或本地路径的 Pandas 列,您可以生成将显示缩略图或任何其他图像大小的图像列。
1. 如果您在列表中有图像的 URL。
您首先需要根据图像 URL 下载图像。adImageList包含要作为列添加到 Pandas 的图像 URL 列表。
dir_base = os.getcwd() # Get your current directory
for i, URL in enumerate(adImageList):
image_name= '0{}_{}'.format(i+1,'_image.jpg') # This will show for example 01_image.jpg
urllib.request.urlretrieve(URL, image_name)
local_path_thumb = os.path.join(dir_base , image_name)
df[i]['local_image_path']=local_path # adding that locally fetched image path to pandas column
Run Code Online (Sandbox Code Playgroud)
2. 如果您在 Pandas 数据框中的单独列中有图像 URL。 首先创建用于获取单个图像的本地 URL 的函数
get_image_local(URL):
image_name= '0{}_{}'.format(i+1,'_image.jpg')
urllib.request.urlretrieve(URL, image_name)
local_path_image = os.path.join(dir_base, image_name)
return (local_path_image)
Run Code Online (Sandbox Code Playgroud)
比使用 lambda 表达式将其映射到新列imageLocal:
df['imageLocal'] = df.URL.map(lambda f: get_image_local(f))
Run Code Online (Sandbox Code Playgroud)
df['imageLocal'] 应该是这样的:
Run Code Online (Sandbox Code Playgroud)0 C:\Users\username\Documents\Base_folder\01_image.jpg 1 C:\Users\username\Documents\Base_folder\02_image.jpg 2 C:\Users\username\Documents\Base_folder\03_image.jpg
3. 使用 PILL 功能,您现在只需复制粘贴即可:
import glob
import random
import base64
import pandas as pd
from PIL import Image
from io import BytesIO
from IPython.display import HTML
import io
pd.set_option('display.max_colwidth', -1)
def get_thumbnail(path):
path = "\\\\?\\"+path # This "\\\\?\\" is used to prevent problems with long Windows paths
i = Image.open(path)
return i
def image_base64(im):
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'jpeg')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'
Run Code Online (Sandbox Code Playgroud)
我们可以将我们的本地图像路径传递给get_thumbnail(path)以下内容:
df['imagePILL'] = df.imageLocal.map(lambda f: get_thumbnail(f))
Run Code Online (Sandbox Code Playgroud)
而且df['imagePILL']应该是这样的:
Run Code Online (Sandbox Code Playgroud)0 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=300x400 at 0x265BA323240> 1 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=200x150 at 0x265BA3231D0> 2 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=300x400 at 0x265BA3238D0>
您可以利用 Pandas 数据框将新列置于所需位置:
df= df.reindex(sorted(df.columns), axis=1)
Run Code Online (Sandbox Code Playgroud)
现在,如果您想查看带有调整大小图像的 Pandas 数据框,只需image_formatter在IPython.displayHTML 函数中调用函数:
HTML(df.to_html(formatters={'imagePILL': image_formatter}, escape=False))
Run Code Online (Sandbox Code Playgroud)
您可以使用任何其他方式来显示 HTML,重要的是在 pandas 数据框中获取 PIL 对象。
从问题中不清楚为什么要使用pandas数据帧来存储图像.我认为这会使事情变得不必要地复杂化.您可以直接以二进制格式直接存储numpy数组,稍后再次加载它.
import numpy as np
import matplotlib.pyplot as plt
#create an image
imar = np.array([[[1.,0.],[0.,0.]],
[[0.,1.],[0.,1.]],
[[0.,0.],[1.,1.]]]).transpose()
plt.imsave('pic.jpg', imar)
# read the image
im = plt.imread('pic.jpg')
# show the image
plt.imshow(im)
plt.show()
#save the image array to binary file
np.save('mypic', im)
# load the image from binary file
new_im= np.load('mypic.npy')
# show the loaded image
plt.imshow(new_im)
plt.show()
Run Code Online (Sandbox Code Playgroud)
作为对以下评论的回应,这些评论以某种方式将问题转向不同的方向,您可能肯定会在数据框中存储图像的路径/名称.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#create an image
imar = np.array([[[1.,0.],[0.,0.]],
[[0.,1.],[0.,1.]],
[[0.,0.],[1.,1.]]]).transpose()
plt.imsave('pic.jpg', imar)
#create dataframe
df = pd.DataFrame([[0,""]], columns=["Feature1","Feature2"])
# read the image
im = plt.imread('pic.jpg')
plt.imshow(im)
plt.show()
#save the image array to binary file
np.save('mypic.npy', im)
# store name of image in dataframe
df.iloc[0,1] = 'mypic.npy'
#save dataframe
df.to_csv("mydf.csv")
del df
#read dataframe from csv
df = pd.read_csv("mydf.csv")
# load the image from binary file, given the path from the Dataframe
new_im= np.load(df["Feature2"][0])
# show the loaded image
plt.imshow(new_im)
plt.show()
Run Code Online (Sandbox Code Playgroud)
最后,您可以按照初始计划的方式将实际图像存储在数据框单元格中,但不是写入csv,而是将pickle映射到数据框,以便可以读取它,就像之前从未保存过一样.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
#create an image
imar = np.array([[[1.,0.],[0.,0.]],
[[0.,1.],[0.,1.]],
[[0.,0.],[1.,1.]]]).transpose()
plt.imsave('pic.jpg', imar)
#create dataframe
df = pd.DataFrame([[0,""]], columns=["Feature1","Feature2"])
# read the image
im = plt.imread('pic.jpg')
plt.imshow(im)
plt.show()
# store the image itself in dataframe
df.iloc[0,1] = [im]
#save dataframe
pickle.dump(df, file("mydf.pickle", "wb"))
del df
#read dataframe from pickle
df = pickle.load(file("mydf.pickle", "rb"))
# show the loaded image from dataframe cell
plt.imshow(df["Feature2"][0][0])
plt.show()
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
13787 次 |
| 最近记录: |