tmt*_*prt 33 python statistics distribution matplotlib scipy
scipy.stats分布直方图可制成的scipy.stats正常随机变量看到分布的样子.
% matplotlib inline
import pandas as pd
import scipy.stats as stats
d = stats.norm()
rv = d.rvs(100000)
pd.Series(rv).hist(bins=32, normed=True)
Run Code Online (Sandbox Code Playgroud)
其他发行版是什么样的?
tmt*_*prt 84
scipy.stats发行版基于分布列表scipy.stats,下面绘制的是每个连续随机变量的直方图和PDF.用于生成每个分布的代码位于底部.注意:形状常量取自scipy.stats分发文档页面上的示例.
alpha(a=3.57, loc=0.00, scale=1.00)
anglit(loc=0.00, scale=1.00)
arcsine(loc=0.00, scale=1.00)
beta(a=2.31, loc=0.00, scale=1.00, b=0.63)
betaprime(a=5.00, loc=0.00, scale=1.00, b=6.00)
bradford(loc=0.00, c=0.30, scale=1.00)
burr(loc=0.00, c=10.50, scale=1.00, d=4.30)
cauchy(loc=0.00, scale=1.00)
chi(df=78.00, loc=0.00, scale=1.00)
chi2(df=55.00, loc=0.00, scale=1.00)
cosine(loc=0.00, scale=1.00)
dgamma(a=1.10, loc=0.00, scale=1.00)
dweibull(loc=0.00, c=2.07, scale=1.00)
erlang(a=2.00, loc=0.00, scale=1.00)
expon(loc=0.00, scale=1.00)
exponnorm(loc=0.00, K=1.50, scale=1.00)
exponpow(loc=0.00, scale=1.00, b=2.70)
exponweib(a=2.89, loc=0.00, c=1.95, scale=1.00)
f(loc=0.00, dfn=29.00, scale=1.00, dfd=18.00)
fatiguelife(loc=0.00, c=29.00, scale=1.00)
fisk(loc=0.00, c=3.09, scale=1.00)
foldcauchy(loc=0.00, c=4.72, scale=1.00)
foldnorm(loc=0.00, c=1.95, scale=1.00)
frechet_l(loc=0.00, c=3.63, scale=1.00)
frechet_r(loc=0.00, c=1.89, scale=1.00)
gamma(a=1.99, loc=0.00, scale=1.00)
gausshyper(a=13.80, loc=0.00, c=2.51, scale=1.00, b=3.12, z=5.18)
genexpon(a=9.13, loc=0.00, c=3.28, scale=1.00, b=16.20)
genextreme(loc=0.00, c=-0.10, scale=1.00)
gengamma(a=4.42, loc=0.00, c=-3.12, scale=1.00)
genhalflogistic(loc=0.00, c=0.77, scale=1.00)
genlogistic(loc=0.00, c=0.41, scale=1.00)
gennorm(loc=0.00, beta=1.30, scale=1.00)
genpareto(loc=0.00, c=0.10, scale=1.00)
gilbrat(loc=0.00, scale=1.00)
gompertz(loc=0.00, c=0.95, scale=1.00)
gumbel_l(loc=0.00, scale=1.00)
gumbel_r(loc=0.00, scale=1.00)
halfcauchy(loc=0.00, scale=1.00)
halfgennorm(loc=0.00, beta=0.68, scale=1.00)
halflogistic(loc=0.00, scale=1.00)
halfnorm(loc=0.00, scale=1.00)
hypsecant(loc=0.00, scale=1.00)
invgamma(a=4.07, loc=0.00, scale=1.00)
invgauss(mu=0.14, loc=0.00, scale=1.00)
invweibull(loc=0.00, c=10.60, scale=1.00)
johnsonsb(a=4.32, loc=0.00, scale=1.00, b=3.18)
johnsonsu(a=2.55, loc=0.00, scale=1.00, b=2.25)
ksone(loc=0.00, scale=1.00, n=1000.00)
kstwobign(loc=0.00, scale=1.00)
laplace(loc=0.00, scale=1.00)
levy(loc=0.00, scale=1.00)
levy_l(loc=0.00, scale=1.00)
loggamma(loc=0.00, c=0.41, scale=1.00)
logistic(loc=0.00, scale=1.00)
loglaplace(loc=0.00, c=3.25, scale=1.00)
lognorm(loc=0.00, s=0.95, scale=1.00)
lomax(loc=0.00, c=1.88, scale=1.00)
maxwell(loc=0.00, scale=1.00)
mielke(loc=0.00, s=3.60, scale=1.00, k=10.40)
nakagami(loc=0.00, scale=1.00, nu=4.97)
ncf(loc=0.00, dfn=27.00, nc=0.42, dfd=27.00, scale=1.00)
nct(df=14.00, loc=0.00, scale=1.00, nc=0.24)
ncx2(df=21.00, loc=0.00, scale=1.00, nc=1.06)
norm(loc=0.00, scale=1.00)
pareto(loc=0.00, scale=1.00, b=2.62)
pearson3(loc=0.00, skew=0.10, scale=1.00)
powerlaw(a=1.66, loc=0.00, scale=1.00)
powerlognorm(loc=0.00, s=0.45, scale=1.00, c=2.14)
powernorm(loc=0.00, c=4.45, scale=1.00)
rayleigh(loc=0.00, scale=1.00)
Pat*_*ald 12
_distr_params这是一个解决方案,它在单个图中显示所有 scipy 概率分布,并通过从包含所有可用分布的合理参数的文件中提取分布形状参数来避免复制粘贴(或网络抓取)分布形状参数。
与接受的答案类似,为每个分布生成随机变量样本。然后,这些样本存储在 pandas 数据框中,其中包含相同分布名称的列被重命名(基于MaxU 的答案),因为某些分布使用不同的参数定义多次列出(例如 kappa4)。这样,可以使用df.hist创建直方图网格的便捷函数来绘制样本。然后用代表概率密度函数的线图覆盖这些图,范围从 0.1% 分位数到 99.9% 分位数。
还有几点需要指出:
matplotlib.pyplot,因为 matplotlib 对象是使用 pandas 绘图函数生成的(除非您需要plt.show)。用于生成 x 标签和随机变量的代码基于 tmthydvnprt 接受的答案和Adam Erickson 的答案以及scipy 文档。
import numpy as np # v 1.19.2
from scipy import stats # v 1.5.2
import pandas as pd # v 1.1.3
pd.options.display.max_columns = 6
np.random.seed(123)
Run Code Online (Sandbox Code Playgroud)
size = 10000
names, xlabels, frozen_rvs, samples = [], [], [], []
# Extract names and sane parameters of all scipy probability distributions
# (except the deprecated ones) and loop through them to create lists of names,
# frozen random variables, samples of random variates and x labels
for name, params in stats._distr_params.distcont:
if name not in ['frechet_l', 'frechet_r']:
loc, scale = 0, 1
names.append(name)
params = list(params) + [loc, scale]
# Create instance of random variable
dist = getattr(stats, name)
# Create frozen random variable using parameters and add it to the list
# to be used to draw the probability density functions
rv = dist(*params)
frozen_rvs.append(rv)
# Create sample of random variates
samples.append(rv.rvs(size=size))
# Create x label containing the distribution parameters
p_names = ['loc', 'scale']
if dist.shapes:
p_names = [sh.strip() for sh in dist.shapes.split(',')] + ['loc', 'scale']
xlabels.append(', '.join([f'{pn}={pv:.2f}' for pn, pv in zip(p_names, params)]))
# Create pandas dataframe containing all the samples
df = pd.DataFrame(data=np.array(samples).T, columns=[name for name in names])
# Rename the duplicate column names by adding a period and an integer at the end
df.columns = pd.io.parsers.ParserBase({'names':df.columns})._maybe_dedup_names(df.columns)
Run Code Online (Sandbox Code Playgroud)
df.head()
# alpha anglit arcsine ... weibull_max weibull_min wrapcauchy
# 0 0.327165 0.166185 0.018339 ... -0.928914 0.359808 4.454122
# 1 0.241819 0.373590 0.630670 ... -0.733157 0.479574 2.778336
# 2 0.231489 0.352024 0.457251 ... -0.580317 1.312468 4.932825
# 3 0.290551 -0.133986 0.797215 ... -0.954856 0.341515 3.874536
# 4 0.334494 -0.353015 0.439837 ... -1.440794 0.498514 5.195171
Run Code Online (Sandbox Code Playgroud)
# Set parameters for figure dimensions
nplot = df.columns.size
cols = 3
rows = int(np.ceil(nplot/cols))
subp_w = 10/cols # 10 corresponds to the figure width in inches
subp_h = 0.9*subp_w
# Create pandas grid of histograms
axs = df.hist(density=True, bins=15, grid=False, edgecolor='w',
linewidth=0.5, legend=False,
layout=(rows, cols), figsize=(cols*subp_w, rows*subp_h))
# Loop over subplots to draw probability density function and apply some
# additional formatting
for idx, ax in enumerate(axs.flat[:df.columns.size]):
rv = frozen_rvs[idx]
x = np.linspace(rv.ppf(0.001), rv.ppf(0.999), size)
ax.plot(x, rv.pdf(x), c='black', alpha=0.5)
ax.set_title(ax.get_title(), pad=25)
ax.set_xlim(x.min(), x.max())
ax.set_xlabel(xlabels[idx], fontsize=8, labelpad=10)
ax.xaxis.set_label_position('top')
ax.tick_params(axis='both', labelsize=9)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.figure.subplots_adjust(hspace=0.8, wspace=0.3)
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
10940 次 |
| 最近记录: |