基于附近点密度的 matplotlib hexbins 中的十六进制大小

Eth*_*han 9 python matplotlib

我有以下代码产生下图

import numpy as np
np.random.seed(3)
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame()
df['X'] = list(np.random.randint(100, size=100)) + list(np.random.randint(30, size=100))
df['Y'] = list(np.random.randint(100, size=100)) + list(np.random.randint(30, size=100))

df['Bin'] = df.apply(lambda row: .1 if row['X'] < 30 and row['Y'] < 30 else .9, axis=1)

fig, ax = plt.subplots(figsize=(10,10))
plt.scatter(df['X'], df['Y'])
Run Code Online (Sandbox Code Playgroud)

分散

我使用 hexbins 绘制了数据,如下所示

from matplotlib import cm

fig, ax = plt.subplots(figsize=(10,10))
hexbin = ax.hexbin(df['X'], df['Y'], C=df['Bin'], gridsize=20, cmap= cm.get_cmap('RdYlBu_r'),edgecolors='black')
plt.show()
Run Code Online (Sandbox Code Playgroud)

六边形

我想根据六边形覆盖的区域中绘制的点的密度来更改六边形的大小。例如,左下角的六边形(点密集的地方)将比其他地方的六边形(点稀疏的地方)大。有没有办法做到这一点?

编辑:我尝试了这个解决方案,但我不知道如何根据 df['Bin'] 为十六进制着色,或者如何设置最小和最大十六进制大小。

from matplotlib.collections import PatchCollection
from matplotlib.path import Path
from matplotlib.patches import PathPatch
fig, ax = plt.subplots(figsize=(10,10))
hexbin = ax.hexbin(df['X'], df['Y'], C=df['Bins'], gridsize=20, cmap= cm.get_cmap('RdYlBu_r'),edgecolors='black')
def sized_hexbin(ax,hc):
    offsets = hc.get_offsets()
    orgpath = hc.get_paths()[0]
    verts = orgpath.vertices
    values = hc.get_array()
    ma = values.max()
    patches = []
    for offset,val in zip(offsets,values):
        v1 = verts*val/ma+offset
        path = Path(v1, orgpath.codes)
        patch = PathPatch(path)
        patches.append(patch)

    pc = PatchCollection(patches, cmap=cm.get_cmap('RdYlBu_r'), edgecolors='black')
    pc.set_array(values)
    ax.add_collection(pc)
    hc.remove()

sized_hexbin(ax,hexbin)
plt.show()
Run Code Online (Sandbox Code Playgroud)

提议的解决方案

los*_*tin 3

您可能需要花一些时间来理解颜色映射。

    import numpy as np
    np.random.seed(3)
    import pandas as pd
    import matplotlib.pyplot as plt
    from matplotlib.collections import PatchCollection
    from matplotlib.path import Path
    from matplotlib.patches import PathPatch
    df = pd.DataFrame()
    df['X'] = list(np.random.randint(100, size=100)) + list(np.random.randint(30, size=100))
    df['Y'] = list(np.random.randint(100, size=100)) + list(np.random.randint(30, size=100))

    df['Bin'] = df.apply(lambda row: .1 if row['X'] < 30 and row['Y'] < 30 else .9, axis=1)

    #fig, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True)
    ax1 = plt.scatter(df['X'], df['Y'])

    fig,ax2 = plt.subplots(figsize=(10,10))
    hexbin = ax2.hexbin(df['X'], df['Y'], C=df['Bin'], gridsize=20,edgecolors='black',cmap= 'RdBu', reduce_C_function=np.bincount) #**

    def sized_hexbin(ax,hc):
        offsets = hc.get_offsets()
        orgpath = hc.get_paths()[0]
        verts = orgpath.vertices
        values = hc.get_array()
        ma = values.max()
        patches = []
        for offset,val in zip(offsets,values):
            v1 = verts*val/ma + offset
            path = Path(v1, orgpath.codes)
            patch = PathPatch(path)
            patches.append(patch)

        pc = PatchCollection(patches, cmap= 'RdBu', edgecolors='black')
        pc.set_array(values)

        ax.add_collection(pc)

        hc.remove()

    sized_hexbin(ax2,hexbin)
    cb = plt.colorbar(hexbin, ax=ax2)

    plt.show()

To plot the chart based on df['bins'] values - 

Need to change the reduce_C_function in #** marked line -

    hexbin = ax2.hexbin(df['X'], df['Y'], C=df['Bin'], gridsize=20,edgecolors='black',cmap= 'RdBu', reduce_C_function=np.sum)

[![enter image description here][2]][2]


  [1]: https://i.stack.imgur.com/kv0U4.png
  [2]: https://i.stack.imgur.com/mb0gD.png

# Another variation of the chart :

# Where size is based on count of points in the bins and color is based on values of the df['bin']./ Also added if condition to control minimum hexbin size.


import numpy as np
np.random.seed(3)
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.path import Path
from matplotlib.patches import PathPatch
from functools import partial

mycmp = 'coolwarm'

df = pd.DataFrame()
df['X'] = list(np.random.randint(100, size=100)) + list(np.random.randint(30, size=100))
df['Y'] = list(np.random.randint(100, size=100)) + list(np.random.randint(30, size=100))

df['Bin'] = df.apply(lambda row: .1 if row['X'] < 30 and row['Y'] < 30 else .9, axis=1)

#fig, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True)
ax1 = plt.scatter(df['X'], df['Y'])


fig,ax2 = plt.subplots(figsize=(10,10))
hexbin = ax2.hexbin(df['X'], df['Y'], C=df['Bin'], gridsize=15,edgecolors='black',cmap= newcmp , reduce_C_function=np.bincount)
hexbin2 = ax2.hexbin(df['X'], df['Y'], C=df['Bin'], gridsize=15,edgecolors='black',cmap= newcmp , reduce_C_function=np.mean)

def sized_hexbin(ax,hc,hc2):
    offsets = hc.get_offsets()
    orgpath = hc.get_paths()[0]
    verts = orgpath.vertices
    values1 = hc.get_array()
    values2 = hc2.get_array()
    ma = values1.max()
    patches = []

    for offset,val in zip(offsets,values1):
        # Adding condition for minimum size 
        if (val/ma) < 0.2:
            val_t = 0.2
        else:
            val_t = val/ma
        v1 =  verts*val_t + offset
        path = Path(v1, orgpath.codes)
        print(path)
        patch = PathPatch(path)
        patches.append(patch)

    pc = PatchCollection(patches, cmap=  newcmp)  #edgecolors='black'
    pc.set_array(values2)

    ax.add_collection(pc)
    hc.remove()
    hc2.remove()


sized_hexbin(ax2,hexbin,hexbin2)
cb = plt.colorbar(hexbin2, ax=ax2)

plt.xlim((-5, 100))
plt.ylim((-5, 100))

plt.show()
Run Code Online (Sandbox Code Playgroud)

在此输入图像描述

在此输入图像描述

在此输入图像描述