scipy.stats中的所有发行版都是什么样的?

tmt*_*prt 33 python statistics distribution matplotlib scipy

可视化scipy.stats分布

直方图可制成scipy.stats正常随机变量看到分布的样子.

% matplotlib inline
import pandas as pd
import scipy.stats as stats
d = stats.norm()
rv = d.rvs(100000)
pd.Series(rv).hist(bins=32, normed=True)
Run Code Online (Sandbox Code Playgroud)

正态分布

其他发行版是什么样的?

tmt*_*prt 84

可视化所有scipy.stats发行版

基于分布列表scipy.stats,下面绘制的是每个连续随机变量直方图PDF.用于生成每个分布的代码位于底部.注意:形状常量取自scipy.stats分发文档页面上的示例.

alpha(a=3.57, loc=0.00, scale=1.00)

alpha(a = 3.57,loc = 0.00,scale = 1.00)

anglit(loc=0.00, scale=1.00)

anglit(loc = 0.00,scale = 1.00)

arcsine(loc=0.00, scale=1.00)

arcsine(loc = 0.00,scale = 1.00)

beta(a=2.31, loc=0.00, scale=1.00, b=0.63)

beta(a = 2.31,loc = 0.00,scale = 1.00,b = 0.63)

betaprime(a=5.00, loc=0.00, scale=1.00, b=6.00)

betaprime(a = 5.00,loc = 0.00,scale = 1.00,b = 6.00)

bradford(loc=0.00, c=0.30, scale=1.00)

布拉德福德(loc = 0.00,c = 0.30,scale = 1.00)

burr(loc=0.00, c=10.50, scale=1.00, d=4.30)

毛刺(loc = 0.00,c = 10.50,比例= 1.00,d = 4.30)

cauchy(loc=0.00, scale=1.00)

cauchy(loc = 0.00,scale = 1.00)

chi(df=78.00, loc=0.00, scale=1.00)

chi(df = 78.00,loc = 0.00,scale = 1.00)

chi2(df=55.00, loc=0.00, scale=1.00)

chi2(df = 55.00,loc = 0.00,scale = 1.00)

cosine(loc=0.00, scale=1.00)

余弦(loc = 0.00,scale = 1.00)

dgamma(a=1.10, loc=0.00, scale=1.00)

dgamma(a = 1.10,loc = 0.00,scale = 1.00)

dweibull(loc=0.00, c=2.07, scale=1.00)

dweibull(loc = 0.00,c = 2.07,scale = 1.00)

erlang(a=2.00, loc=0.00, scale=1.00)

erlang(a = 2.00,loc = 0.00,scale = 1.00)

expon(loc=0.00, scale=1.00)

expon(loc = 0.00,scale = 1.00)

exponnorm(loc=0.00, K=1.50, scale=1.00)

exponnorm(loc = 0.00,K = 1.50,scale = 1.00)

exponpow(loc=0.00, scale=1.00, b=2.70)

exponpow(loc = 0.00,scale = 1.00,b = 2.70)

exponweib(a=2.89, loc=0.00, c=1.95, scale=1.00)

exponweib(a = 2.89,loc = 0.00,c = 1.95,scale = 1.00)

f(loc=0.00, dfn=29.00, scale=1.00, dfd=18.00)

f(loc = 0.00,dfn = 29.00,scale = 1.00,dfd = 18.00)

fatiguelife(loc=0.00, c=29.00, scale=1.00)

fatiguelife(loc = 0.00,c = 29.00,scale = 1.00)

fisk(loc=0.00, c=3.09, scale=1.00)

fisk(loc = 0.00,c = 3.09,scale = 1.00)

foldcauchy(loc=0.00, c=4.72, scale=1.00)

foldcauchy(loc = 0.00,c = 4.72,scale = 1.00)

foldnorm(loc=0.00, c=1.95, scale=1.00)

foldnorm(loc = 0.00,c = 1.95,scale = 1.00)

frechet_l(loc=0.00, c=3.63, scale=1.00)

frechet_l(loc = 0.00,c = 3.63,scale = 1.00)

frechet_r(loc=0.00, c=1.89, scale=1.00)

frechet_r(loc = 0.00,c = 1.89,scale = 1.00)

gamma(a=1.99, loc=0.00, scale=1.00)

gamma(a = 1.99,loc = 0.00,scale = 1.00)

gausshyper(a=13.80, loc=0.00, c=2.51, scale=1.00, b=3.12, z=5.18)

gausshyper(a = 13.80,loc = 0.00,c = 2.51,scale = 1.00,b = 3.12,z = 5.18)

genexpon(a=9.13, loc=0.00, c=3.28, scale=1.00, b=16.20)

genexpon(a = 9.13,loc = 0.00,c = 3.28,scale = 1.00,b = 16.20)

genextreme(loc=0.00, c=-0.10, scale=1.00)

genextreme(loc=0.00, c=-0.10, scale=1.00)

gengamma(a=4.42, loc=0.00, c=-3.12, scale=1.00)

gengamma(a=4.42, loc=0.00, c=-3.12, scale=1.00)

genhalflogistic(loc=0.00, c=0.77, scale=1.00)

genhalflogistic(loc=0.00, c=0.77, scale=1.00)

genlogistic(loc=0.00, c=0.41, scale=1.00)

genlogistic(loc=0.00, c=0.41, scale=1.00)

gennorm(loc=0.00, beta=1.30, scale=1.00)

gennorm(loc=0.00, beta=1.30, scale=1.00)

genpareto(loc=0.00, c=0.10, scale=1.00)

genpareto(loc=0.00, c=0.10, scale=1.00)

gilbrat(loc=0.00, scale=1.00)

gilbrat(loc=0.00, scale=1.00)

gompertz(loc=0.00, c=0.95, scale=1.00)

gompertz(loc=0.00, c=0.95, scale=1.00)

gumbel_l(loc=0.00, scale=1.00)

gumbel_l(loc=0.00, scale=1.00)

gumbel_r(loc=0.00, scale=1.00)

gumbel_r(loc=0.00, scale=1.00)

halfcauchy(loc=0.00, scale=1.00)

halfcauchy(loc=0.00, scale=1.00)

halfgennorm(loc=0.00, beta=0.68, scale=1.00)

halfgennorm(loc=0.00, beta=0.68, scale=1.00)

halflogistic(loc=0.00, scale=1.00)

halflogistic(loc=0.00, scale=1.00)

halfnorm(loc=0.00, scale=1.00)

halfnorm(loc=0.00, scale=1.00)

hypsecant(loc=0.00, scale=1.00)

hypsecant(loc=0.00, scale=1.00)

invgamma(a=4.07, loc=0.00, scale=1.00)

invgamma(a=4.07, loc=0.00, scale=1.00)

invgauss(mu=0.14, loc=0.00, scale=1.00)

invgauss(mu=0.14, loc=0.00, scale=1.00)

invweibull(loc=0.00, c=10.60, scale=1.00)

invweibull(loc=0.00, c=10.60, scale=1.00)

johnsonsb(a=4.32, loc=0.00, scale=1.00, b=3.18)

johnsonsb(a=4.32, loc=0.00, scale=1.00, b=3.18)

johnsonsu(a=2.55, loc=0.00, scale=1.00, b=2.25)

johnsonsu(a=2.55, loc=0.00, scale=1.00, b=2.25)

ksone(loc=0.00, scale=1.00, n=1000.00)

ksone(loc=0.00, scale=1.00, n=1000.00)

kstwobign(loc=0.00, scale=1.00)

kstwobign(loc=0.00, scale=1.00)

laplace(loc=0.00, scale=1.00)

laplace(loc=0.00, scale=1.00)

levy(loc=0.00, scale=1.00)

levy(loc=0.00, scale=1.00)

levy_l(loc=0.00, scale=1.00)

levy_l(loc=0.00, scale=1.00)

loggamma(loc=0.00, c=0.41, scale=1.00)

loggamma(loc=0.00, c=0.41, scale=1.00)

logistic(loc=0.00, scale=1.00)

logistic(loc=0.00, scale=1.00)

loglaplace(loc=0.00, c=3.25, scale=1.00)

loglaplace(loc=0.00, c=3.25, scale=1.00)

lognorm(loc=0.00, s=0.95, scale=1.00)

lognorm(loc=0.00, s=0.95, scale=1.00)

lomax(loc=0.00, c=1.88, scale=1.00)

lomax(loc=0.00, c=1.88, scale=1.00)

maxwell(loc=0.00, scale=1.00)

maxwell(loc=0.00, scale=1.00)

mielke(loc=0.00, s=3.60, scale=1.00, k=10.40)

mielke(loc=0.00, s=3.60, scale=1.00, k=10.40)

nakagami(loc=0.00, scale=1.00, nu=4.97)

nakagami(loc=0.00, scale=1.00, nu=4.97)

ncf(loc=0.00, dfn=27.00, nc=0.42, dfd=27.00, scale=1.00)

ncf(loc=0.00, dfn=27.00, nc=0.42, dfd=27.00, scale=1.00)

nct(df=14.00, loc=0.00, scale=1.00, nc=0.24)

nct(df=14.00, loc=0.00, scale=1.00, nc=0.24)

ncx2(df=21.00, loc=0.00, scale=1.00, nc=1.06)

ncx2(df=21.00, loc=0.00, scale=1.00, nc=1.06)

norm(loc=0.00, scale=1.00)

norm(loc=0.00, scale=1.00)

pareto(loc=0.00, scale=1.00, b=2.62)

pareto(loc=0.00, scale=1.00, b=2.62)

pearson3(loc=0.00, skew=0.10, scale=1.00)

pearson3(loc=0.00, skew=0.10, scale=1.00)

powerlaw(a=1.66, loc=0.00, scale=1.00)

powerlaw(a=1.66, loc=0.00, scale=1.00)

powerlognorm(loc=0.00, s=0.45, scale=1.00, c=2.14)

powerlognorm(loc=0.00, s=0.45, scale=1.00, c=2.14)

powernorm(loc=0.00, c=4.45, scale=1.00)

powernorm(loc=0.00, c=4.45, scale=1.00)

rayleigh(loc=0.00, scale=1.00)

rayleigh(loc=0.00, scale=1.00)

  • 超越这个答案! (4认同)
  • 有人给这家伙一枚奖章! (3认同)
  • @TheLazyScripter谢谢,所有链接和图片只涉及你的很多名字,哈哈. (2认同)

  • Pat*_*ald 12

    在单个图中可视化所有 scipy 概率分布

    _distr_params这是一个解决方案,它在单个图中显示所有 scipy 概率分布,并通过从包含所有可用分布的合理参数的文件中提取分布形状参数来避免复制粘贴(或网络抓取)分布形状参数。

    与接受的答案类似,为每个分布生成随机变量样本。然后,这些样本存储在 pandas 数据框中,其中包含相同分布名称的列被重命名(基于MaxU 的答案),因为某些分布使用不同的参数定义多次列出(例如 kappa4)。这样,可以使用df.hist创建直方图网格的便捷函数来绘制样本。然后用代表概率密度函数的线图覆盖这些图,范围从 0.1% 分位数到 99.9% 分位数。

    还有几点需要指出:

    • 所有分布的位置和尺度参数均设置为默认值(0 和 1)。
    • 由于一个或多个异常值位于 0.1-99.9% 分位数限制之外,某些直方图仅显示几个非常宽的条形。
    • 在本例中,绘图宽度仅限于 10 英寸,以保持上传图像的清晰度。因此,您可能会注意到一些 x 标签(用作字幕)重叠。
    • 无需导入matplotlib.pyplot,因为 matplotlib 对象是使用 pandas 绘图函数生成的(除非您需要plt.show)。

    用于生成 x 标签和随机变量的代码基于 tmthydvnprt 接受的答案和Adam Erickson 的答案以及scipy 文档

    import numpy as np         # v 1.19.2
    from scipy import stats    # v 1.5.2
    import pandas as pd        # v 1.1.3
    
    pd.options.display.max_columns = 6
    np.random.seed(123)
    
    Run Code Online (Sandbox Code Playgroud)
    size = 10000
    names, xlabels, frozen_rvs, samples = [], [], [], []
    
    # Extract names and sane parameters of all scipy probability distributions
    # (except the deprecated ones) and loop through them to create lists of names,
    # frozen random variables, samples of random variates and x labels
    for name, params in stats._distr_params.distcont:
        if name not in ['frechet_l', 'frechet_r']:
            loc, scale = 0, 1
            names.append(name)
            params = list(params) + [loc, scale]
            
            # Create instance of random variable
            dist = getattr(stats, name)
            
            # Create frozen random variable using parameters and add it to the list
            # to be used to draw the probability density functions
            rv = dist(*params)
            frozen_rvs.append(rv)
            
            # Create sample of random variates
            samples.append(rv.rvs(size=size))
            
            # Create x label containing the distribution parameters
            p_names = ['loc', 'scale']
            if dist.shapes:
                p_names = [sh.strip() for sh in dist.shapes.split(',')] + ['loc', 'scale']
            xlabels.append(', '.join([f'{pn}={pv:.2f}' for pn, pv in zip(p_names, params)]))
    
    # Create pandas dataframe containing all the samples
    df = pd.DataFrame(data=np.array(samples).T, columns=[name for name in names])
    # Rename the duplicate column names by adding a period and an integer at the end
    df.columns = pd.io.parsers.ParserBase({'names':df.columns})._maybe_dedup_names(df.columns)
    
    Run Code Online (Sandbox Code Playgroud)
    df.head()
    
    #       alpha     anglit    arcsine  ...  weibull_max  weibull_min   wrapcauchy
    # 0  0.327165   0.166185   0.018339  ...    -0.928914     0.359808     4.454122
    # 1  0.241819   0.373590   0.630670  ...    -0.733157     0.479574     2.778336
    # 2  0.231489   0.352024   0.457251  ...    -0.580317     1.312468     4.932825
    # 3  0.290551  -0.133986   0.797215  ...    -0.954856     0.341515     3.874536
    # 4  0.334494  -0.353015   0.439837  ...    -1.440794     0.498514     5.195171
    
    Run Code Online (Sandbox Code Playgroud)
    # Set parameters for figure dimensions
    nplot = df.columns.size
    cols = 3
    rows = int(np.ceil(nplot/cols))
    subp_w = 10/cols  # 10 corresponds to the figure width in inches
    subp_h = 0.9*subp_w
    
    # Create pandas grid of histograms
    axs = df.hist(density=True, bins=15, grid=False, edgecolor='w',
                  linewidth=0.5, legend=False,
                  layout=(rows, cols), figsize=(cols*subp_w, rows*subp_h))
    
    # Loop over subplots to draw probability density function and apply some
    # additional formatting
    for idx, ax in enumerate(axs.flat[:df.columns.size]):
        rv = frozen_rvs[idx]
        x = np.linspace(rv.ppf(0.001), rv.ppf(0.999), size)
        ax.plot(x, rv.pdf(x), c='black', alpha=0.5)
        ax.set_title(ax.get_title(), pad=25)
        ax.set_xlim(x.min(), x.max())
        ax.set_xlabel(xlabels[idx], fontsize=8, labelpad=10)
        ax.xaxis.set_label_position('top')
        ax.tick_params(axis='both', labelsize=9)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
    
    ax.figure.subplots_adjust(hspace=0.8, wspace=0.3)
    
    Run Code Online (Sandbox Code Playgroud)

    scipy_distributions