tor*_*rho 7 python random list
创建一个总和为X的随机向量(例如X = 1000)非常简单:
import random
def RunFloat():
Scalar = 1000
VectorSize = 30
RandomVector = [random.random() for i in range(VectorSize)]
RandomVectorSum = sum(RandomVector)
RandomVector = [Scalar*i/RandomVectorSum for i in RandomVector]
return RandomVector
RunFloat()
Run Code Online (Sandbox Code Playgroud)
上面的代码创建了一个向量,其值为浮点数,sum为1000.
我很难创建一个简单的函数来创建一个值为整数且和为X的向量(例如X = 1000*30)
import random
def RunInt():
LowerBound = 600
UpperBound = 1200
VectorSize = 30
RandomVector = [random.randint(LowerBound,UpperBound) for i in range(VectorSize)]
RandomVectorSum = 1000*30
#Sanity check that our RandomVectorSum is sensible/feasible
if LowerBound*VectorSize <= RandomVectorSum and RandomVectorSum <= UpperBound*VectorSum:
if sum(RandomVector) == RandomVectorSum:
return RandomVector
else:
RunInt()
Run Code Online (Sandbox Code Playgroud)
有没有人有任何改进这个想法的建议?我的代码可能永远不会完成或遇到递归深度问题.
感谢Oliver,mgilson和Dougal的投入.我的解决方案如下所示.
这是我为此编辑所做的代码:
我意识到正常分布没有正确实现,我已经将其修改为以下内容:
import random
def RandFloats(Size):
Scalar = 1.0
VectorSize = Size
RandomVector = [random.random() for i in range(VectorSize)]
RandomVectorSum = sum(RandomVector)
RandomVector = [Scalar*i/RandomVectorSum for i in RandomVector]
return RandomVector
from numpy.random import multinomial
import math
def RandIntVec(ListSize, ListSumValue, Distribution='Normal'):
"""
Inputs:
ListSize = the size of the list to return
ListSumValue = The sum of list values
Distribution = can be 'uniform' for uniform distribution, 'normal' for a normal distribution ~ N(0,1) with +/- 5 sigma (default), or a list of size 'ListSize' or 'ListSize - 1' for an empirical (arbitrary) distribution. Probabilities of each of the p different outcomes. These should sum to 1 (however, the last element is always assumed to account for the remaining probability, as long as sum(pvals[:-1]) <= 1).
Output:
A list of random integers of length 'ListSize' whose sum is 'ListSumValue'.
"""
if type(Distribution) == list:
DistributionSize = len(Distribution)
if ListSize == DistributionSize or (ListSize-1) == DistributionSize:
Values = multinomial(ListSumValue,Distribution,size=1)
OutputValue = Values[0]
elif Distribution.lower() == 'uniform': #I do not recommend this!!!! I see that it is not as random (at least on my computer) as I had hoped
UniformDistro = [1/ListSize for i in range(ListSize)]
Values = multinomial(ListSumValue,UniformDistro,size=1)
OutputValue = Values[0]
elif Distribution.lower() == 'normal':
"""
Normal Distribution Construction....It's very flexible and hideous
Assume a +-3 sigma range. Warning, this may or may not be a suitable range for your implementation!
If one wishes to explore a different range, then changes the LowSigma and HighSigma values
"""
LowSigma = -3#-3 sigma
HighSigma = 3#+3 sigma
StepSize = 1/(float(ListSize) - 1)
ZValues = [(LowSigma * (1-i*StepSize) +(i*StepSize)*HighSigma) for i in range(int(ListSize))]
#Construction parameters for N(Mean,Variance) - Default is N(0,1)
Mean = 0
Var = 1
#NormalDistro= [self.NormalDistributionFunction(Mean, Var, x) for x in ZValues]
NormalDistro= list()
for i in range(len(ZValues)):
if i==0:
ERFCVAL = 0.5 * math.erfc(-ZValues[i]/math.sqrt(2))
NormalDistro.append(ERFCVAL)
elif i == len(ZValues) - 1:
ERFCVAL = NormalDistro[0]
NormalDistro.append(ERFCVAL)
else:
ERFCVAL1 = 0.5 * math.erfc(-ZValues[i]/math.sqrt(2))
ERFCVAL2 = 0.5 * math.erfc(-ZValues[i-1]/math.sqrt(2))
ERFCVAL = ERFCVAL1 - ERFCVAL2
NormalDistro.append(ERFCVAL)
#print "Normal Distribution sum = %f"%sum(NormalDistro)
Values = multinomial(ListSumValue,NormalDistro,size=1)
OutputValue = Values[0]
else:
raise ValueError ('Cannot create desired vector')
return OutputValue
else:
raise ValueError ('Cannot create desired vector')
return OutputValue
#Some Examples
ListSize = 4
ListSumValue = 12
for i in range(100):
print RandIntVec(ListSize, ListSumValue,Distribution=RandFloats(ListSize))
Run Code Online (Sandbox Code Playgroud)
上面的代码可以在github上找到.这是我为学校建造的课程的一部分.user1149913,也发布了一个很好的解释问题.
我建议不要递归地执行此操作:
当您递归采样时,第一个索引中的值具有更大的可能范围,而后续索引中的值将受到第一个值的约束。这将产生类似于指数分布的结果。
相反,我建议从多项分布中采样。这将平等地对待每个索引,约束总和,强制所有值均为整数,并从遵循这些规则的所有可能配置中统一采样(注意:可以多种方式发生的配置将按它们可能发生的方式数量进行加权) )。
为了帮助将您的问题与多项式表示法合并,总和为 n(整数),因此每个 k 值(每个索引一个,也是整数)必须介于 0 和 n 之间。然后按照这里的食谱进行操作。
(或者使用numpy.random.multinomial作为@Dougal 有用的建议)。