O.r*_*rka 3 python arrays optimization numpy array-broadcasting
我正在尝试利用NumPy 广播和后端数组计算来显着加快此功能。不幸的是,它不能很好地扩展,所以我希望大大提高它的性能。现在代码没有正确利用广播进行计算。
我使用WGCNA 的 bicor 函数作为黄金标准,因为这是我目前所知道的最快的实现。Python 版本输出与 R 函数相同的结果。
# ==============================================================================
# Imports
# ==============================================================================
# Built-ins
import os, sys, time, multiprocessing
# 3rd party
import numpy as np
import pandas as pd
# ==============================================================================
# R Imports
# ==============================================================================
from rpy2 import robjects, rinterface
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
R = robjects.r
NULL = robjects.rinterface.NULL
rinterface.set_writeconsole_regular(None)
WGCNA = importr("WGCNA")
# Python
def _biweight_midcorrelation(a, b):
a_median = np.median(a)
b_median = np.median(b)
# Median absolute deviation
a_mad = np.median(np.abs(a - a_median))
b_mad = np.median(np.abs(b - b_median))
u = (a - a_median) / (9 * a_mad)
v = (b - b_median) / (9 * b_mad)
w_a = np.square(1 - np.square(u)) * ((1 - np.abs(u)) > 0)
w_b = np.square(1 - np.square(v)) * ((1 - np.abs(v)) > 0)
a_item = (a - a_median) * w_a
b_item = (b - b_median) * w_b
return (a_item * b_item).sum() / (
np.sqrt(np.square(a_item).sum()) *
np.sqrt(np.square(b_item).sum()))
def biweight_midcorrelation(X):
return X.corr(method=_biweight_midcorrelation)
# # OLD IMPLEMENTATION
# def biweight_midcorrelation(X):
# median = X.median()
# mad = (X - median).abs().median()
# U = (X - median) / (9 * mad)
# adjacency = np.square(1 - np.square(U)) * ((1 - U.abs()) > 0)
# estimator = (X - median) * adjacency
# bicor_matrix = np.empty((X.shape[1], X.shape[1]), dtype=float)
# for i, ac in enumerate(estimator):
# for j, bc in enumerate(estimator):
# a = estimator[ac]
# b = estimator[bc]
# c = (a * b).sum() / (
# np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()))
# bicor_matrix[i, j] = c
# bicor_matrix[j, i] = c
# return pd.DataFrame(bicor_matrix, index=X.columns, columns=X.columns)
# R
def biweight_midcorrelation_r_wrapper(X, n_jobs=-1, r_package=None):
"""
WGCNA: bicor
function (x, y = NULL, robustX = TRUE, robustY = TRUE, use = "all.obs",
maxPOutliers = 1, qu <...> dian absolute deviation, or zero variance."))
"""
if r_package is None:
r_package = importr("WGCNA")
if n_jobs == -1:
n_jobs = multiprocessing.cpu_count()
labels = X.columns
r_df_sim = r_package.bicor(pandas2ri.py2ri(X), nThreads=n_jobs)
df_bicor = pd.DataFrame(pandas2ri.ri2py(r_df_sim), index=labels, columns=labels)
return df_bicor
# X.shape = (150,4)
X = pd.DataFrame({'sepal_length': {'iris_0': 5.1, 'iris_1': 4.9, 'iris_2': 4.7, 'iris_3': 4.6, 'iris_4': 5.0, 'iris_5': 5.4, 'iris_6': 4.6, 'iris_7': 5.0, 'iris_8': 4.4, 'iris_9': 4.9, 'iris_10': 5.4, 'iris_11': 4.8, 'iris_12': 4.8, 'iris_13': 4.3, 'iris_14': 5.8, 'iris_15': 5.7, 'iris_16': 5.4, 'iris_17': 5.1, 'iris_18': 5.7, 'iris_19': 5.1, 'iris_20': 5.4, 'iris_21': 5.1, 'iris_22': 4.6, 'iris_23': 5.1, 'iris_24': 4.8, 'iris_25': 5.0, 'iris_26': 5.0, 'iris_27': 5.2, 'iris_28': 5.2, 'iris_29': 4.7, 'iris_30': 4.8, 'iris_31': 5.4, 'iris_32': 5.2, 'iris_33': 5.5, 'iris_34': 4.9, 'iris_35': 5.0, 'iris_36': 5.5, 'iris_37': 4.9, 'iris_38': 4.4, 'iris_39': 5.1, 'iris_40': 5.0, 'iris_41': 4.5, 'iris_42': 4.4, 'iris_43': 5.0, 'iris_44': 5.1, 'iris_45': 4.8, 'iris_46': 5.1, 'iris_47': 4.6, 'iris_48': 5.3, 'iris_49': 5.0, 'iris_50': 7.0, 'iris_51': 6.4, 'iris_52': 6.9, 'iris_53': 5.5, 'iris_54': 6.5, 'iris_55': 5.7, 'iris_56': 6.3, 'iris_57': 4.9, 'iris_58': 6.6, 'iris_59': 5.2, 'iris_60': 5.0, 'iris_61': 5.9, 'iris_62': 6.0, 'iris_63': 6.1, 'iris_64': 5.6, 'iris_65': 6.7, 'iris_66': 5.6, 'iris_67': 5.8, 'iris_68': 6.2, 'iris_69': 5.6, 'iris_70': 5.9, 'iris_71': 6.1, 'iris_72': 6.3, 'iris_73': 6.1, 'iris_74': 6.4, 'iris_75': 6.6, 'iris_76': 6.8, 'iris_77': 6.7, 'iris_78': 6.0, 'iris_79': 5.7, 'iris_80': 5.5, 'iris_81': 5.5, 'iris_82': 5.8, 'iris_83': 6.0, 'iris_84': 5.4, 'iris_85': 6.0, 'iris_86': 6.7, 'iris_87': 6.3, 'iris_88': 5.6, 'iris_89': 5.5, 'iris_90': 5.5, 'iris_91': 6.1, 'iris_92': 5.8, 'iris_93': 5.0, 'iris_94': 5.6, 'iris_95': 5.7, 'iris_96': 5.7, 'iris_97': 6.2, 'iris_98': 5.1, 'iris_99': 5.7, 'iris_100': 6.3, 'iris_101': 5.8, 'iris_102': 7.1, 'iris_103': 6.3, 'iris_104': 6.5, 'iris_105': 7.6, 'iris_106': 4.9, 'iris_107': 7.3, 'iris_108': 6.7, 'iris_109': 7.2, 'iris_110': 6.5, 'iris_111': 6.4, 'iris_112': 6.8, 'iris_113': 5.7, 'iris_114': 5.8, 'iris_115': 6.4, 'iris_116': 6.5, 'iris_117': 7.7, 'iris_118': 7.7, 'iris_119': 6.0, 'iris_120': 6.9, 'iris_121': 5.6, 'iris_122': 7.7, 'iris_123': 6.3, 'iris_124': 6.7, 'iris_125': 7.2, 'iris_126': 6.2, 'iris_127': 6.1, 'iris_128': 6.4, 'iris_129': 7.2, 'iris_130': 7.4, 'iris_131': 7.9, 'iris_132': 6.4, 'iris_133': 6.3, 'iris_134': 6.1, 'iris_135': 7.7, 'iris_136': 6.3, 'iris_137': 6.4, 'iris_138': 6.0, 'iris_139': 6.9, 'iris_140': 6.7, 'iris_141': 6.9, 'iris_142': 5.8, 'iris_143': 6.8, 'iris_144': 6.7, 'iris_145': 6.7, 'iris_146': 6.3, 'iris_147': 6.5, 'iris_148': 6.2, 'iris_149': 5.9}, 'sepal_width': {'iris_0': 3.5, 'iris_1': 3.0, 'iris_2': 3.2, 'iris_3': 3.1, 'iris_4': 3.6, 'iris_5': 3.9, 'iris_6': 3.4, 'iris_7': 3.4, 'iris_8': 2.9, 'iris_9': 3.1, 'iris_10': 3.7, 'iris_11': 3.4, 'iris_12': 3.0, 'iris_13': 3.0, 'iris_14': 4.0, 'iris_15': 4.4, 'iris_16': 3.9, 'iris_17': 3.5, 'iris_18': 3.8, 'iris_19': 3.8, 'iris_20': 3.4, 'iris_21': 3.7, 'iris_22': 3.6, 'iris_23': 3.3, 'iris_24': 3.4, 'iris_25': 3.0, 'iris_26': 3.4, 'iris_27': 3.5, 'iris_28': 3.4, 'iris_29': 3.2, 'iris_30': 3.1, 'iris_31': 3.4, 'iris_32': 4.1, 'iris_33': 4.2, 'iris_34': 3.1, 'iris_35': 3.2, 'iris_36': 3.5, 'iris_37': 3.6, 'iris_38': 3.0, 'iris_39': 3.4, 'iris_40': 3.5, 'iris_41': 2.3, 'iris_42': 3.2, 'iris_43': 3.5, 'iris_44': 3.8, 'iris_45': 3.0, 'iris_46': 3.8, 'iris_47': 3.2, 'iris_48': 3.7, 'iris_49': 3.3, 'iris_50': 3.2, 'iris_51': 3.2, 'iris_52': 3.1, 'iris_53': 2.3, 'iris_54': 2.8, 'iris_55': 2.8, 'iris_56': 3.3, 'iris_57': 2.4, 'iris_58': 2.9, 'iris_59': 2.7, 'iris_60': 2.0, 'iris_61': 3.0, 'iris_62': 2.2, 'iris_63': 2.9, 'iris_64': 2.9, 'iris_65': 3.1, 'iris_66': 3.0, 'iris_67': 2.7, 'iris_68': 2.2, 'iris_69': 2.5, 'iris_70': 3.2, 'iris_71': 2.8, 'iris_72': 2.5, 'iris_73': 2.8, 'iris_74': 2.9, 'iris_75': 3.0, 'iris_76': 2.8, 'iris_77': 3.0, 'iris_78': 2.9, 'iris_79': 2.6, 'iris_80': 2.4, 'iris_81': 2.4, 'iris_82': 2.7, 'iris_83': 2.7, 'iris_84': 3.0, 'iris_85': 3.4, 'iris_86': 3.1, 'iris_87': 2.3, 'iris_88': 3.0, 'iris_89': 2.5, 'iris_90': 2.6, 'iris_91': 3.0, 'iris_92': 2.6, 'iris_93': 2.3, 'iris_94': 2.7, 'iris_95': 3.0, 'iris_96': 2.9, 'iris_97': 2.9, 'iris_98': 2.5, 'iris_99': 2.8, 'iris_100': 3.3, 'iris_101': 2.7, 'iris_102': 3.0, 'iris_103': 2.9, 'iris_104': 3.0, 'iris_105': 3.0, 'iris_106': 2.5, 'iris_107': 2.9, 'iris_108': 2.5, 'iris_109': 3.6, 'iris_110': 3.2, 'iris_111': 2.7, 'iris_112': 3.0, 'iris_113': 2.5, 'iris_114': 2.8, 'iris_115': 3.2, 'iris_116': 3.0, 'iris_117': 3.8, 'iris_118': 2.6, 'iris_119': 2.2, 'iris_120': 3.2, 'iris_121': 2.8, 'iris_122': 2.8, 'iris_123': 2.7, 'iris_124': 3.3, 'iris_125': 3.2, 'iris_126': 2.8, 'iris_127': 3.0, 'iris_128': 2.8, 'iris_129': 3.0, 'iris_130': 2.8, 'iris_131': 3.8, 'iris_132': 2.8, 'iris_133': 2.8, 'iris_134': 2.6, 'iris_135': 3.0, 'iris_136': 3.4, 'iris_137': 3.1, 'iris_138': 3.0, 'iris_139': 3.1, 'iris_140': 3.1, 'iris_141': 3.1, 'iris_142': 2.7, 'iris_143': 3.2, 'iris_144': 3.3, 'iris_145': 3.0, 'iris_146': 2.5, 'iris_147': 3.0, 'iris_148': 3.4, 'iris_149': 3.0}, 'petal_length': {'iris_0': 1.4, 'iris_1': 1.4, 'iris_2': 1.3, 'iris_3': 1.5, 'iris_4': 1.4, 'iris_5': 1.7, 'iris_6': 1.4, 'iris_7': 1.5, 'iris_8': 1.4, 'iris_9': 1.5, 'iris_10': 1.5, 'iris_11': 1.6, 'iris_12': 1.4, 'iris_13': 1.1, 'iris_14': 1.2, 'iris_15': 1.5, 'iris_16': 1.3, 'iris_17': 1.4, 'iris_18': 1.7, 'iris_19': 1.5, 'iris_20': 1.7, 'iris_21': 1.5, 'iris_22': 1.0, 'iris_23': 1.7, 'iris_24': 1.9, 'iris_25': 1.6, 'iris_26': 1.6, 'iris_27': 1.5, 'iris_28': 1.4, 'iris_29': 1.6, 'iris_30': 1.6, 'iris_31': 1.5, 'iris_32': 1.5, 'iris_33': 1.4, 'iris_34': 1.5, 'iris_35': 1.2, 'iris_36': 1.3, 'iris_37': 1.4, 'iris_38': 1.3, 'iris_39': 1.5, 'iris_40': 1.3, 'iris_41': 1.3, 'iris_42': 1.3, 'iris_43': 1.6, 'iris_44': 1.9, 'iris_45': 1.4, 'iris_46': 1.6, 'iris_47': 1.4, 'iris_48': 1.5, 'iris_49': 1.4, 'iris_50': 4.7, 'iris_51': 4.5, 'iris_52': 4.9, 'iris_53': 4.0, 'iris_54': 4.6, 'iris_55': 4.5, 'iris_56': 4.7, 'iris_57': 3.3, 'iris_58': 4.6, 'iris_59': 3.9, 'iris_60': 3.5, 'iris_61': 4.2, 'iris_62': 4.0, 'iris_63': 4.7, 'iris_64': 3.6, 'iris_65': 4.4, 'iris_66': 4.5, 'iris_67': 4.1, 'iris_68': 4.5, 'iris_69': 3.9, 'iris_70': 4.8, 'iris_71': 4.0, 'iris_72': 4.9, 'iris_73': 4.7, 'iris_74': 4.3, 'iris_75': 4.4, 'iris_76': 4.8, 'iris_77': 5.0, 'iris_78': 4.5, 'iris_79': 3.5, 'iris_80': 3.8, 'iris_81': 3.7, 'iris_82': 3.9, 'iris_83': 5.1, 'iris_84': 4.5, 'iris_85': 4.5, 'iris_86': 4.7, 'iris_87': 4.4, 'iris_88': 4.1, 'iris_89': 4.0, 'iris_90': 4.4, 'iris_91': 4.6, 'iris_92': 4.0, 'iris_93': 3.3, 'iris_94': 4.2, 'iris_95': 4.2, 'iris_96': 4.2, 'iris_97': 4.3, 'iris_98': 3.0, 'iris_99': 4.1, 'iris_100': 6.0, 'iris_101': 5.1, 'iris_102': 5.9, 'iris_103': 5.6, 'iris_104': 5.8, 'iris_105': 6.6, 'iris_106': 4.5, 'iris_107': 6.3, 'iris_108': 5.8, 'iris_109': 6.1, 'iris_110': 5.1, 'iris_111': 5.3, 'iris_112': 5.5, 'iris_113': 5.0, 'iris_114': 5.1, 'iris_115': 5.3, 'iris_116': 5.5, 'iris_117': 6.7, 'iris_118': 6.9, 'iris_119': 5.0, 'iris_120': 5.7, 'iris_121': 4.9, 'iris_122': 6.7, 'iris_123': 4.9, 'iris_124': 5.7, 'iris_125': 6.0, 'iris_126': 4.8, 'iris_127': 4.9, 'iris_128': 5.6, 'iris_129': 5.8, 'iris_130': 6.1, 'iris_131': 6.4, 'iris_132': 5.6, 'iris_133': 5.1, 'iris_134': 5.6, 'iris_135': 6.1, 'iris_136': 5.6, 'iris_137': 5.5, 'iris_138': 4.8, 'iris_139': 5.4, 'iris_140': 5.6, 'iris_141': 5.1, 'iris_142': 5.1, 'iris_143': 5.9, 'iris_144': 5.7, 'iris_145': 5.2, 'iris_146': 5.0, 'iris_147': 5.2, 'iris_148': 5.4, 'iris_149': 5.1}, 'petal_width': {'iris_0': 0.2, 'iris_1': 0.2, 'iris_2': 0.2, 'iris_3': 0.2, 'iris_4': 0.2, 'iris_5': 0.4, 'iris_6': 0.3, 'iris_7': 0.2, 'iris_8': 0.2, 'iris_9': 0.1, 'iris_10': 0.2, 'iris_11': 0.2, 'iris_12': 0.1, 'iris_13': 0.1, 'iris_14': 0.2, 'iris_15': 0.4, 'iris_16': 0.4, 'iris_17': 0.3, 'iris_18': 0.3, 'iris_19': 0.3, 'iris_20': 0.2, 'iris_21': 0.4, 'iris_22': 0.2, 'iris_23': 0.5, 'iris_24': 0.2, 'iris_25': 0.2, 'iris_26': 0.4, 'iris_27': 0.2, 'iris_28': 0.2, 'iris_29': 0.2, 'iris_30': 0.2, 'iris_31': 0.4, 'iris_32': 0.1, 'iris_33': 0.2, 'iris_34': 0.2, 'iris_35': 0.2, 'iris_36': 0.2, 'iris_37': 0.1, 'iris_38': 0.2, 'iris_39': 0.2, 'iris_40': 0.3, 'iris_41': 0.3, 'iris_42': 0.2, 'iris_43': 0.6, 'iris_44': 0.4, 'iris_45': 0.3, 'iris_46': 0.2, 'iris_47': 0.2, 'iris_48': 0.2, 'iris_49': 0.2, 'iris_50': 1.4, 'iris_51': 1.5, 'iris_52': 1.5, 'iris_53': 1.3, 'iris_54': 1.5, 'iris_55': 1.3, 'iris_56': 1.6, 'iris_57': 1.0, 'iris_58': 1.3, 'iris_59': 1.4, 'iris_60': 1.0, 'iris_61': 1.5, 'iris_62': 1.0, 'iris_63': 1.4, 'iris_64': 1.3, 'iris_65': 1.4, 'iris_66': 1.5, 'iris_67': 1.0, 'iris_68': 1.5, 'iris_69': 1.1, 'iris_70': 1.8, 'iris_71': 1.3, 'iris_72': 1.5, 'iris_73': 1.2, 'iris_74': 1.3, 'iris_75': 1.4, 'iris_76': 1.4, 'iris_77': 1.7, 'iris_78': 1.5, 'iris_79': 1.0, 'iris_80': 1.1, 'iris_81': 1.0, 'iris_82': 1.2, 'iris_83': 1.6, 'iris_84': 1.5, 'iris_85': 1.6, 'iris_86': 1.5, 'iris_87': 1.3, 'iris_88': 1.3, 'iris_89': 1.3, 'iris_90': 1.2, 'iris_91': 1.4, 'iris_92': 1.2, 'iris_93': 1.0, 'iris_94': 1.3, 'iris_95': 1.2, 'iris_96': 1.3, 'iris_97': 1.3, 'iris_98': 1.1, 'iris_99': 1.3, 'iris_100': 2.5, 'iris_101': 1.9, 'iris_102': 2.1, 'iris_103': 1.8, 'iris_104': 2.2, 'iris_105': 2.1, 'iris_106': 1.7, 'iris_107': 1.8, 'iris_108': 1.8, 'iris_109': 2.5, 'iris_110': 2.0, 'iris_111': 1.9, 'iris_112': 2.1, 'iris_113': 2.0, 'iris_114': 2.4, 'iris_115': 2.3, 'iris_116': 1.8, 'iris_117': 2.2, 'iris_118': 2.3, 'iris_119': 1.5, 'iris_120': 2.3, 'iris_121': 2.0, 'iris_122': 2.0, 'iris_123': 1.8, 'iris_124': 2.1, 'iris_125': 1.8, 'iris_126': 1.8, 'iris_127': 1.8, 'iris_128': 2.1, 'iris_129': 1.6, 'iris_130': 1.9, 'iris_131': 2.0, 'iris_132': 2.2, 'iris_133': 1.5, 'iris_134': 1.4, 'iris_135': 2.3, 'iris_136': 2.4, 'iris_137': 1.8, 'iris_138': 1.8, 'iris_139': 2.1, 'iris_140': 2.4, 'iris_141': 2.3, 'iris_142': 1.9, 'iris_143': 2.3, 'iris_144': 2.5, 'iris_145': 2.3, 'iris_146': 1.9, 'iris_147': 2.0, 'iris_148': 2.3, 'iris_149': 1.8}})
# Python computation
start_time = time.time()
df_bicor__python = biweight_midcorrelation(X)
# R computation
df_bicor__r = biweight_midcorrelation_r_wrapper(X)
np.allclose(df_bicor__python, df_bicor__r)
Run Code Online (Sandbox Code Playgroud)
一个人可以写这个计算大约。快一个数量级(对于您指定的输入):
import numpy as np
def biweight_midcorrelation(arr):
n, m = arr.shape
arr = arr - np.median(arr, axis=0, keepdims=True)
v = 1 - (arr / (9 * np.median(np.abs(arr), axis=0, keepdims=True))) ** 2
arr = arr * v ** 2 * (v > 0)
norms = np.sqrt(np.sum(arr ** 2, axis=0))
return np.einsum('mi,mj->ij', arr, arr) / norms[:, None] / norms[None, :]
Run Code Online (Sandbox Code Playgroud)
通过以下方式桥接到 Pandas 数据框:
import pandas as pd
def corr_np2pd(df, func):
return pd.DataFrame(func(np.array(df)), index=df.columns, columns=df.columns)
Run Code Online (Sandbox Code Playgroud)
其用法是:
corr_df = corr_np2pd(df, biweight_midcorrelation)
Run Code Online (Sandbox Code Playgroud)
通过使用 Numba 实现最后一次计算,这可以做得更快。
我不太确定为什么您希望广播在当前代码中有所帮助。您可能是说矢量化吗?无论如何,我相信可以编写更快的代码,并且“旧”方法的矢量化版本会胜过您当前的方法。使用 Numba 可以更快地做到这一点。
有两种实用的方法可以解决您的问题:
pd.DataFrame.corr()在执行 (1) 时,如果不计算相关矩阵的不必要部分,则可能无法避免显式循环。
在执行 (2) 时,需要为每一对(对称)一维输入(2 * comb(n, 2)次)计算辅助值,而不是为每个一维输入(n次)计算一次辅助值. 例如,对于问题中指定的输入,需要执行n == 4预计算,但是,如果以成对方式完成,则该数字变为2 * comb(4, 2) == 12。
让我们看看如何在两种情况下推动性能。
让我们首先定义一个函数作为 Pandas 到 NumPy 的桥梁:
import numpy as np
import pandas as pd
def corr_np2pd(df, func):
return pd.DataFrame(func(np.array(df)), index=df.columns, columns=df.columns)
Run Code Online (Sandbox Code Playgroud)
现在在注释中的具有显式循环的函数属于这一类,它在下面报告为biweight_midcorrelation_pd_OP():
def biweight_midcorrelation_pd_OP(X):
median = X.median()
mad = (X - median).abs().median()
U = (X - median) / (9 * mad)
adjacency = np.square(1 - np.square(U)) * ((1 - U.abs()) > 0)
estimator = (X - median) * adjacency
bicor_matrix = np.empty((X.shape[1], X.shape[1]), dtype=float)
for i, ac in enumerate(estimator):
for j, bc in enumerate(estimator):
a = estimator[ac]
b = estimator[bc]
c = (a * b).sum() / (
np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()))
bicor_matrix[i, j] = c
bicor_matrix[j, i] = c
return pd.DataFrame(bicor_matrix, index=X.columns, columns=X.columns)
Run Code Online (Sandbox Code Playgroud)
一个稍微修改的版本,其中计算完全在 NumPy 中完成并且应该与 一起使用corr_np2pd(),读取:
def biweight_midcorrelation_OP(arr):
n, m = arr.shape
med = np.median(arr, axis=0, keepdims=True)
mad = np.median(np.abs(arr - med), axis=0, keepdims=True)
u = (arr - med) / (9 * mad)
adj = ((1 - u ** 2) ** 2) * ((1 - np.abs(u)) > 0)
est = (arr - med) * adj
result = np.empty((m, m))
for i in range(m):
for j in range(m):
a = est[:, i]
b = est[:, j]
c = (a * b).sum() / (
np.sqrt(np.sum(a ** 2)) * np.sqrt(np.sum(b ** 2)))
result[i, j] = result[j, i] = c
return result
Run Code Online (Sandbox Code Playgroud)
现在,这有一些改进点:
最后一点可以通过两种方式改进:
biweight_midcorrelation_np()biweight_midcorrelation_npv()def biweight_midcorrelation_np(arr):
n, m = arr.shape
arr = arr - np.median(arr, axis=0, keepdims=True)
v = 1 - (arr / (9 * np.median(np.abs(arr), axis=0, keepdims=True))) ** 2
arr = arr * v ** 2 * (v > 0)
norms = np.sqrt(np.sum(arr ** 2, axis=0))
result = np.empty((m, m))
np.fill_diagonal(result, 1.0)
for i, j in zip(*np.triu_indices(m, 1)):
result[i, j] = result[j, i] = \
np.sum(arr[:, i] * arr[:, j]) / norms[i] / norms[j]
return result
Run Code Online (Sandbox Code Playgroud)
def biweight_midcorrelation_npv(arr):
n, m = arr.shape
arr = arr - np.median(arr, axis=0, keepdims=True)
v = 1 - (arr / (9 * np.median(np.abs(arr), axis=0, keepdims=True))) ** 2
arr = arr * v ** 2 * (v > 0)
norms = np.sqrt(np.sum(arr ** 2, axis=0))
return np.einsum('mi,mj->ij', arr, arr) / norms[:, None] / norms[None, :]
Run Code Online (Sandbox Code Playgroud)
m由于显式循环,第一个只要很小就会很快。第二个通常会很快,但两次计算矩阵的某些条目似乎效率低下。为了克服这两个问题,可以用 Numba 重写最后的循环:
import numba as nb
@nb.jit
def _biweight_midcorrelation_triu_nb(n, m, est, norms, result):
for i in range(m):
for j in range(i + 1, m):
x = 0
for k in range(n):
x += est[k, i] * est[k, j]
result[i, j] = result[j, i] = x / norms[i] / norms[j]
def biweight_midcorrelation_nb(arr):
n, m = arr.shape
arr = arr - np.median(arr, axis=0, keepdims=True)
v = 1 - (arr / (9 * np.median(np.abs(arr), axis=0, keepdims=True))) ** 2
arr = arr * v ** 2 * (v > 0)
norms = np.sqrt(np.sum(arr ** 2, axis=0))
result = np.empty((m, m))
np.fill_diagonal(result, 1.0)
_biweight_midcorrelation_triu_nb(n, m, arr, norms, result)
return result
Run Code Online (Sandbox Code Playgroud)
您现在提出的方法的稍微修改版本属于此类别:
def pairwise_biweight_midcorrelation_OP(a, b):
a_median = np.median(a)
b_median = np.median(b)
a_mad = np.median(np.abs(a - a_median))
b_mad = np.median(np.abs(b - b_median))
u_a = (a - a_median) / (9 * a_mad)
u_b = (b - b_median) / (9 * b_mad)
adj_a = (1 - u_a ** 2) ** 2 * ((1 - np.abs(u_a)) > 0)
adj_b = (1 - u_b ** 2) ** 2 * ((1 - np.abs(u_b)) > 0)
a = (a - a_median) * adj_a
b = (b - b_median) * adj_b
return np.sum(a * b) / (np.sqrt(np.sum(a ** 2)) * np.sqrt(np.sum(b ** 2)))
Run Code Online (Sandbox Code Playgroud)
这可能会写得更简洁一些,使用与上述类似的简化,导致:
def pairwise_biweight_midcorrelation_opt(a, b):
a = a - np.median(a)
b = b - np.median(b)
v_a = 1 - (a / (9 * np.median(np.abs(a)))) ** 2
v_b = 1 - (b / (9 * np.median(np.abs(b)))) ** 2
a = a * v_a ** 2 * (v_a > 0)
b = b * v_b ** 2 * (v_b > 0)
return np.sum(a * b) / (np.sqrt(np.sum(a ** 2)) * np.sqrt(np.sum(b ** 2)))
Run Code Online (Sandbox Code Playgroud)
最后的操作是在执行求和a,并b三次,但它实际上在一个循环,这可能与Numba再次做出快速完成:
@nb.jit
def pairwise_biweight_midcorrelation_nb(a, b):
n = a.size
a = a - np.median(a)
b = b - np.median(b)
v_a = 1 - (a / (9 * np.median(np.abs(a)))) ** 2
v_b = 1 - (b / (9 * np.median(np.abs(b)))) ** 2
a = (v_a > 0) * a * v_a ** 2
b = (v_b > 0) * b * v_b ** 2
s_ab = s_aa = s_bb = 0
for i in range(n):
s_ab += a[i] * b[i]
s_aa += a[i] * a[i]
s_bb += b[i] * b[i]
return s_ab / np.sqrt(s_aa) / np.sqrt(s_bb)
Run Code Online (Sandbox Code Playgroud)
但是没有简单的方法可以避免执行预计算2 * comb(n, 2)时间而不是n时间。故事的另一面是这类方法需要较少的内存,因为每次迭代只考虑两个一维数组。
对于建议的输入:
import pandas as pd
df = pd.DataFrame({'sepal_length': {'iris_0': 5.1, 'iris_1': 4.9, 'iris_2': 4.7, 'iris_3': 4.6, 'iris_4': 5.0, 'iris_5': 5.4, 'iris_6': 4.6, 'iris_7': 5.0, 'iris_8': 4.4, 'iris_9': 4.9, 'iris_10': 5.4, 'iris_11': 4.8, 'iris_12': 4.8, 'iris_13': 4.3, 'iris_14': 5.8, 'iris_15': 5.7, 'iris_16': 5.4, 'iris_17': 5.1, 'iris_18': 5.7, 'iris_19': 5.1, 'iris_20': 5.4, 'iris_21': 5.1, 'iris_22': 4.6, 'iris_23': 5.1, 'iris_24': 4.8, 'iris_25': 5.0, 'iris_26': 5.0, 'iris_27': 5.2, 'iris_28': 5.2, 'iris_29': 4.7, 'iris_30': 4.8, 'iris_31': 5.4, 'iris_32': 5.2, 'iris_33': 5.5, 'iris_34': 4.9, 'iris_35': 5.0, 'iris_36': 5.5, 'iris_37': 4.9, 'iris_38': 4.4, 'iris_39': 5.1, 'iris_40': 5.0, 'iris_41': 4.5, 'iris_42': 4.4, 'iris_43': 5.0, 'iris_44': 5.1, 'iris_45': 4.8, 'iris_46': 5.1, 'iris_47': 4.6, 'iris_48': 5.3, 'iris_49': 5.0, 'iris_50': 7.0, 'iris_51': 6.4, 'iris_52': 6.9, 'iris_53': 5.5, 'iris_54': 6.5, 'iris_55': 5.7, 'iris_56': 6.3, 'iris_57': 4.9, 'iris_58': 6.6, 'iris_59': 5.2, 'iris_60': 5.0, 'iris_61': 5.9, 'iris_62': 6.0, 'iris_63': 6.1, 'iris_64': 5.6, 'iris_65': 6.7, 'iris_66': 5.6, 'iris_67': 5.8, 'iris_68': 6.2, 'iris_69': 5.6, 'iris_70': 5.9, 'iris_71': 6.1, 'iris_72': 6.3, 'iris_73': 6.1, 'iris_74': 6.4, 'iris_75': 6.6, 'iris_76': 6.8, 'iris_77': 6.7, 'iris_78': 6.0, 'iris_79': 5.7, 'iris_80': 5.5, 'iris_81': 5.5, 'iris_82': 5.8, 'iris_83': 6.0, 'iris_84': 5.4, 'iris_85': 6.0, 'iris_86': 6.7, 'iris_87': 6.3, 'iris_88': 5.6, 'iris_89': 5.5, 'iris_90': 5.5, 'iris_91': 6.1, 'iris_92': 5.8, 'iris_93': 5.0, 'iris_94': 5.6, 'iris_95': 5.7, 'iris_96': 5.7, 'iris_97': 6.2, 'iris_98': 5.1, 'iris_99': 5.7, 'iris_100': 6.3, 'iris_101': 5.8, 'iris_102': 7.1, 'iris_103': 6.3, 'iris_104': 6.5, 'iris_105': 7.6, 'iris_106': 4.9, 'iris_107': 7.3, 'iris_108': 6.7, 'iris_109': 7.2, 'iris_110': 6.5, 'iris_111': 6.4, 'iris_112': 6.8, 'iris_113': 5.7, 'iris_114': 5.8, 'iris_115': 6.4, 'iris_116': 6.5, 'iris_117': 7.7, 'iris_118': 7.7, 'iris_119': 6.0, 'iris_120': 6.9, 'iris_121': 5.6, 'iris_122': 7.7, 'iris_123': 6.3, 'iris_124': 6.7, 'iris_125': 7.2, 'iris_126': 6.2, 'iris_127': 6.1, 'iris_128': 6.4, 'iris_129': 7.2, 'iris_130': 7.4, 'iris_131': 7.9, 'iris_132': 6.4, 'iris_133': 6.3, 'iris_134': 6.1, 'iris_135': 7.7, 'iris_136': 6.3, 'iris_137': 6.4, 'iris_138': 6.0, 'iris_139': 6.9, 'iris_140': 6.7, 'iris_141': 6.9, 'iris_142': 5.8, 'iris_143': 6.8, 'iris_144': 6.7, 'iris_145': 6.7, 'iris_146': 6.3, 'iris_147': 6.5, 'iris_148': 6.2, 'iris_149': 5.9}, 'sepal_width': {'iris_0': 3.5, 'iris_1': 3.0, 'iris_2': 3.2, 'iris_3': 3.1, 'iris_4': 3.6, 'iris_5': 3.9, 'iris_6': 3.4, 'iris_7': 3.4, 'iris_8': 2.9, 'iris_9': 3.1, 'iris_10': 3.7, 'iris_11': 3.4, 'iris_12': 3.0, 'iris_13': 3.0, 'iris_14': 4.0, 'iris_15': 4.4, 'iris_16': 3.9, 'iris_17': 3.5, 'iris_18': 3.8, 'iris_19': 3.8, 'iris_20': 3.4, 'iris_21': 3.7, 'iris_22': 3.6, 'iris_23': 3.3, 'iris_24': 3.4, 'iris_25': 3.0, 'iris_26': 3.4, 'iris_27': 3.5, 'iris_28': 3.4, 'iris_29': 3.2, 'iris_30': 3.1, 'iris_31': 3.4, 'iris_32': 4.1, 'iris_33': 4.2, 'iris_34': 3.1, 'iris_35': 3.2, 'iris_36': 3.5, 'iris_37': 3.6, 'iris_38': 3.0, 'iris_39': 3.4, 'iris_40': 3.5, 'iris_41': 2.3, 'iris_42': 3.2, 'iris_43': 3.5, 'iris_44': 3.8, 'iris_45': 3.0, 'iris_46': 3.8, 'iris_47': 3.2, 'iris_48': 3.7, 'iris_49': 3.3, 'iris_50': 3.2, 'iris_51': 3.2, 'iris_52': 3.1, 'iris_53': 2.3, 'iris_54': 2.8, 'iris_55': 2.8, 'iris_56': 3.3, 'iris_57': 2.4, 'iris_58': 2.9, 'iris_59': 2.7, 'iris_60': 2.0, 'iris_61': 3.0, 'iris_62': 2.2, 'iris_63': 2.9, 'iris_64': 2.9, 'iris_65': 3.1, 'iris_66': 3.0, 'iris_67': 2.7, 'iris_68': 2.2, 'iris_69': 2.5, 'iris_70': 3.2, 'iris_71': 2.8, 'iris_72': 2.5, 'iris_73': 2.8, 'iris_74': 2.9, 'iris_75': 3.0, 'iris_76': 2.8, 'iris_77': 3.0, 'iris_78': 2.9, 'iris_79': 2.6, 'iris_80': 2.4, 'iris_81': 2.4, 'iris_82': 2.7, 'iris_83': 2.7, 'iris_84': 3.0, 'iris_85': 3.4, 'iris_86': 3.1, 'iris_87': 2.3, 'iris_88': 3.0, 'iris_89': 2.5, 'iris_90': 2.6, 'iris_91': 3.0, 'iris_92': 2.6, 'iris_93': 2.3, 'iris_94': 2.7, 'iris_95': 3.0, 'iris_96': 2.9, 'iris_97': 2.9, 'iris_98': 2.5, 'iris_99': 2.8, 'iris_100': 3.3, 'iris_101': 2.7, 'iris_102': 3.0, 'iris_103': 2.9, 'iris_104': 3.0, 'iris_105': 3.0, 'iris_106': 2.5, 'iris_107': 2.9, 'iris_108': 2.5, 'iris_109': 3.6, 'iris_110': 3.2, 'iris_111': 2.7, 'iris_112': 3.0, 'iris_113': 2.5, 'iris_114': 2.8, 'iris_115': 3.2, 'iris_116': 3.0, 'iris_117': 3.8, 'iris_118': 2.6, 'iris_119': 2.2, 'iris_120': 3.2, 'iris_121': 2.8, 'iris_122': 2.8, 'iris_123': 2.7, 'iris_124': 3.3, 'iris_125': 3.2, 'iris_126': 2.8, 'iris_127': 3.0, 'iris_128': 2.8, 'iris_129': 3.0, 'iris_130': 2.8, 'iris_131': 3.8, 'iris_132': 2.8, 'iris_133': 2.8, 'iris_134': 2.6, 'iris_135': 3.0, 'iris_136': 3.4, 'iris_137': 3.1, 'iris_138': 3.0, 'iris_139': 3.1, 'iris_140': 3.1, 'iris_141': 3.1, 'iris_142': 2.7, 'iris_143': 3.2, 'iris_144': 3.3, 'iris_145': 3.0, 'iris_146': 2.5, 'iris_147': 3.0, 'iris_148': 3.4, 'iris_149': 3.0}, 'petal_length': {'iris_0': 1.4, 'iris_1': 1.4, 'iris_2': 1.3, 'iris_3': 1.5, 'iris_4': 1.4, 'iris_5': 1.7, 'iris_6': 1.4, 'iris_7': 1.5, 'iris_8': 1.4, 'iris_9': 1.5, 'iris_10': 1.5, 'iris_11': 1.6, 'iris_12': 1.4, 'iris_13': 1.1, 'iris_14': 1.2, 'iris_15': 1.5, 'iris_16': 1.3, 'iris_17': 1.4, 'iris_18': 1.7, 'iris_19': 1.5, 'iris_20': 1.7, 'iris_21': 1.5, 'iris_22': 1.0, 'iris_23': 1.7, 'iris_24': 1.9, 'iris_25': 1.6, 'iris_26': 1.6, 'iris_27': 1.5, 'iris_28': 1.4, 'iris_29': 1.6, 'iris_30': 1.6, 'iris_31': 1.5, 'iris_32': 1.5, 'iris_33': 1.4, 'iris_34': 1.5, 'iris_35': 1.2, 'iris_36': 1.3, 'iris_37': 1.4, 'iris_38': 1.3, 'iris_39': 1.5, 'iris_40': 1.3, 'iris_41': 1.3, 'iris_42': 1.3, 'iris_43': 1.6, 'iris_44': 1.9, 'iris_45': 1.4, 'iris_46': 1.6, 'iris_47': 1.4, 'iris_48': 1.5, 'iris_49': 1.4, 'iris_50': 4.7, 'iris_51': 4.5, 'iris_52': 4.9, 'iris_53': 4.0, 'iris_54': 4.6, 'iris_55': 4.5, 'iris_56': 4.7, 'iris_57': 3.3, 'iris_58': 4.6, 'iris_59': 3.9, 'iris_60': 3.5, 'iris_61': 4.2, 'iris_62': 4.0, 'iris_63': 4.7, 'iris_64': 3.6, 'iris_65': 4.4, 'iris_66': 4.5, 'iris_67': 4.1, 'iris_68': 4.5, 'iris_69': 3.9, 'iris_70': 4.8, 'iris_71': 4.0, 'iris_72': 4.9, 'iris_73': 4.7, 'iris_74': 4.3, 'iris_75': 4.4, 'iris_76': 4.8, 'iris_77': 5.0, 'iris_78': 4.5, 'iris_79': 3.5, 'iris_80': 3.8, 'iris_81': 3.7, 'iris_82': 3.9, 'iris_83': 5.1, 'iris_84': 4.5, 'iris_85': 4.5, 'iris_86': 4.7, 'iris_87': 4.4, 'iris_88': 4.1, 'iris_89': 4.0, 'iris_90': 4.4, 'iris_91': 4.6, 'iris_92': 4.0, 'iris_93': 3.3, 'iris_94': 4.2, 'iris_95': 4.2, 'iris_96': 4.2, 'iris_97': 4.3, 'iris_98': 3.0, 'iris_99': 4.1, 'iris_100': 6.0, 'iris_101': 5.1, 'iris_102': 5.9, 'iris_103': 5.6, 'iris_104': 5.8, 'iris_105': 6.6, 'iris_106': 4.5, 'iris_107': 6.3, 'iris_108': 5.8, 'iris_109': 6.1, 'iris_110': 5.1, 'iris_111': 5.3, 'iris_112': 5.5, 'iris_113': 5.0, 'iris_114': 5.1, 'iris_115': 5.3, 'iris_116': 5.5, 'iris_117': 6.7, 'iris_118': 6.9, 'iris_119': 5.0, 'iris_120': 5.7, 'iris_121': 4.9, 'iris_122': 6.7, 'iris_123': 4.9, 'iris_124': 5.7, 'iris_125': 6.0, 'iris_126': 4.8, 'iris_127': 4.9, 'iris_128': 5.6, 'iris_129': 5.8, 'iris_130': 6.1, 'iris_131': 6.4, 'iris_132': 5.6, 'iris_133': 5.1, 'iris_134': 5.6, 'iris_135': 6.1, 'iris_136': 5.6, 'iris_137': 5.5, 'iris_138': 4.8, 'iris_139': 5.4, 'iris_140': 5.6, 'iris_141': 5.1, 'iris_142': 5.1, 'iris_143': 5.9, 'iris_144': 5.7, 'iris_145': 5.2, 'iris_146': 5.0, 'iris_147': 5.2, 'iris_148': 5.4, 'iris_149': 5.1}, 'petal_width': {'iris_0': 0.2, 'iris_1': 0.2, 'iris_2': 0.2, 'iris_3': 0.2, 'iris_4': 0.2, 'iris_5': 0.4, 'iris_6': 0.3, 'iris_7': 0.2, 'iris_8': 0.2, 'iris_9': 0.1, 'iris_10': 0.2, 'iris_11': 0.2, 'iris_12': 0.1, 'iris_13': 0.1, 'iris_14': 0.2, 'iris_15': 0.4, 'iris_16': 0.4, 'iris_17': 0.3, 'iris_18': 0.3, 'iris_19': 0.3, 'iris_20': 0.2, 'iris_21': 0.4, 'iris_22': 0.2, 'iris_23': 0.5, 'iris_24': 0.2, 'iris_25': 0.2, 'iris_26': 0.4, 'iris_27': 0.2, 'iris_28': 0.2, 'iris_29': 0.2, 'iris_30': 0.2, 'iris_31': 0.4, 'iris_32': 0.1, 'iris_33': 0.2, 'iris_34': 0.2, 'iris_35': 0.2, 'iris_36': 0.2, 'iris_37': 0.1, 'iris_38': 0.2, 'iris_39': 0.2, 'iris_40': 0.3, 'iris_41': 0.3, 'iris_42': 0.2, 'iris_43': 0.6, 'iris_44': 0.4, 'iris_45': 0.3, 'iris_46': 0.2, 'iris_47': 0.2, 'iris_48': 0.2, 'iris_49': 0.2, 'iris_50': 1.4, 'iris_51': 1.5, 'iris_52': 1.5, 'iris_53': 1.3, 'iris_54': 1.5, 'iris_55': 1.3, 'iris_56': 1.6, 'iris_57': 1.0, 'iris_58': 1.3, 'iris_59': 1.4, 'iris_60': 1.0, 'iris_61': 1.5, 'iris_62': 1.0, 'iris_63': 1.4, 'iris_64': 1.3, 'iris_65': 1.4, 'iris_66': 1.5, 'iris_67': 1.0, 'iris_68': 1.5, 'iris_69': 1.1, 'iris_70': 1.8, 'iris_71': 1.3, 'iris_72': 1.5, 'iris_73': 1.2, 'iris_74': 1.3, 'iris_75': 1.4, 'iris_76': 1.4, 'iris_77': 1.7, 'iris_78': 1.5, 'iris_79': 1.0, 'iris_80': 1.1, 'iris_81': 1.0, 'iris_82': 1.2, 'iris_83': 1.6, 'iris_84': 1.5, 'iris_85': 1.6, 'iris_86': 1.5, 'iris_87': 1.3, 'iris_88': 1.3, 'iris_89': 1.3, 'iris_90': 1.2, 'iris_91': 1.4, 'iris_92': 1.2, 'iris_93': 1.0, 'iris_94': 1.3, 'iris_95': 1.2, 'iris_96': 1.3, 'iris_97': 1.3, 'iris_98': 1.1, 'iris_99': 1.3, 'iris_100': 2.5, 'iris_101': 1.9, 'iris_102': 2.1, 'iris_103': 1.8, 'iris_104': 2.2, 'iris_105': 2.1, 'iris_106': 1.7, 'iris_107': 1.8, 'iris_108': 1.8, 'iris_109': 2.5, 'iris_110': 2.0, 'iris_111': 1.9, 'iris_112': 2.1, 'iris_113': 2.0, 'iris_114': 2.4, 'iris_115': 2.3, 'iris_116': 1.8, 'iris_117': 2.2, 'iris_118': 2.3, 'iris_119': 1.5, 'iris_120': 2.3, 'iris_121': 2.0, 'iris_122': 2.0, 'iris_123': 1.8, 'iris_124': 2.1, 'iris_125': 1.8, 'iris_126': 1.8, 'iris_127': 1.8, 'iris_128': 2.1, 'iris_129': 1.6, 'iris_130': 1.9, 'iris_131': 2.0, 'iris_132': 2.2, 'iris_133': 1.5, 'iris_134': 1.4, 'iris_135': 2.3, 'iris_136': 2.4, 'iris_137': 1.8, 'iris_138': 1.8, 'iris_139': 2.1, 'iris_140': 2.4, 'iris_141': 2.3, 'iris_142': 1.9, 'iris_143': 2.3, 'iris_144': 2.5, 'iris_145': 2.3, 'iris_146': 1.9, 'iris_147': 2.0, 'iris_148': 2.3, 'iris_149': 1.8}})
Run Code Online (Sandbox Code Playgroud)
我们获得:
print(np.all(np.isclose(biweight_midcorrelation_pd_OP(df), result)))
# True
print(np.all(np.isclose(corr_np2pd(df, biweight_midcorrelation_OP), result)))
# True
print(np.all(np.isclose(corr_np2pd(df, biweight_midcorrelation_np), result)))
# True
print(np.all(np.isclose(corr_np2pd(df, biweight_midcorrelation_npv), result)))
# True
print(np.all(np.isclose(corr_np2pd(df, biweight_midcorrelation_nb), result)))
# True
print(np.all(np.isclose(df.corr(method=pairwise_biweight_midcorrelation_OP), result)))
# True
print(np.all(np.isclose(df.corr(method=pairwise_biweight_midcorrelation_opt), result)))
# True
print(np.all(np.isclose(df.corr(method=pairwise_biweight_midcorrelation_nb), result)))
# True
Run Code Online (Sandbox Code Playgroud)
%timeit biweight_midcorrelation_pd_OP(df)
# 10 loops, best of 3: 22.1 ms per loop
%timeit corr_np2pd(df, biweight_midcorrelation_OP)
# 1000 loops, best of 3: 682 µs per loop
%timeit corr_np2pd(df, biweight_midcorrelation_np)
# 1000 loops, best of 3: 422 µs per loop
%timeit corr_np2pd(df, biweight_midcorrelation_npv)
# 1000 loops, best of 3: 341 µs per loop
%timeit corr_np2pd(df, biweight_midcorrelation_nb)
# 1000 loops, best of 3: 325 µs per loop
%timeit df.corr(method=pairwise_biweight_midcorrelation_OP)
# 100 loops, best of 3: 1.96 ms per loop
%timeit df.corr(method=pairwise_biweight_midcorrelation_opt)
# 100 loops, best of 3: 1.83 ms per loop
%timeit df.corr(method=pairwise_biweight_midcorrelation_nb)
# 1000 loops, best of 3: 506 µs per loop
Run Code Online (Sandbox Code Playgroud)
这些结果表明基于 Numba 的方法是最快的,紧随其后的是原始方法的 NumPy 向量化版本。
请注意,从基于 Pandas 的计算到纯基于 NumPy 的方法(即使使用显式循环),我们获得了近 30 倍的速度因子。对两个for循环进行矢量化为我们提供了另一个大约。2 倍系数。
该pd.DataFrame.corr()基础的方法(ES)是,不使用时Numba,约。比用 NumPy 重写的原始方法慢 4 倍,因此即使您没有看到显式循环也要小心!Numba 加速pairwise_biweight_midcorrelation_nb()极大地推动了这一系列方法,但它不可能避免预计算的开销。
最后警告:所有这些基准测试都应该谨慎对待!
(编辑以包含基于 Numba 的方法与 一起使用pd.DataFrame.corr())。
| 归档时间: |
|
| 查看次数: |
266 次 |
| 最近记录: |