4 python math optimization performance
我写了这个非常优化的C代码,它做了一个简单的数学计算:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
unsigned long long int p(int);
float fullCheck(int);
int main(int argc, char **argv){
int i, g, maxNumber;
unsigned long long int diff = 1000;
if(argc < 2){
fprintf(stderr, "Usage: %s maxNumber\n", argv[0]);
return 0;
}
maxNumber = atoi(argv[1]);
for(i = 1; i < maxNumber; i++){
for(g = 1; g < maxNumber; g++){
if(i == g)
continue;
if(p(MAX(i,g)) - p(MIN(i,g)) < diff && fullCheck(p(MAX(i,g)) - p(MIN(i,g))) && fullCheck(p(i) + p(g))){
diff = p(MAX(i,g)) - p(MIN(i,g));
printf("We have a couple %llu %llu with diff %llu\n", p(i), p(g), diff);
}
}
}
return 0;
}
float fullCheck(int number){
float check = (-1 + sqrt(1 + 24 * number))/-6;
float check2 = (-1 - sqrt(1 + 24 * number))/-6;
if(check/1.00 == (int)check)
return check;
if(check2/1.00 == (int)check2)
return check2;
return 0;
}
unsigned long long int p(int n){
return n * (3 * n - 1 ) / 2;
}
Run Code Online (Sandbox Code Playgroud)
然后我尝试(只是为了好玩)在Python下移植它,看看它会如何反应.我的第一个版本几乎是1:1转换,运行速度非常慢(Python中为120 +秒,C中为<1秒).我做了一些优化,这就是我获得的:
#!/usr/bin/env/python
from cmath import sqrt
import cProfile
from pstats import Stats
def quickCheck(n):
partial_c = (sqrt(1 + 24 * (n)))/-6
c = 1/6 + partial_c
if int(c.real) == c.real:
return True
c = c - 2*partial_c
if int(c.real) == c.real:
return True
return False
def main():
maxNumber = 5000
diff = 1000
for i in range(1, maxNumber):
p_i = i * (3 * i - 1 ) / 2
for g in range(i, maxNumber):
if i == g:
continue
p_g = g * (3 * g - 1 ) / 2
if p_i > p_g:
ma = p_i
mi = p_g
else:
ma = p_g
mi = p_i
if ma - mi < diff and quickCheck(ma - mi):
if quickCheck(ma + mi):
print ('New couple ', ma, mi)
diff = ma - mi
cProfile.run('main()','script_perf')
perf = Stats('script_perf').sort_stats('time', 'calls').print_stats(10)
Run Code Online (Sandbox Code Playgroud)
这比大约16秒运行更好,但也比C慢近20倍.现在,我知道C比这种计算更好于Python,但我想知道的是,如果有一些我错过的东西(Python - 像一个非常慢的功能或类似的东西,可以使这个功能更快.请注意,我正在使用Python 3.1.1,如果这有所不同
S.L*_*ott 17
由于quickCheck被调用接近25,000,000次,您可能希望使用memoization来缓存答案.
您可以在C和Python中进行memoization.C中的事情也会快得多.
您在1/6quickCheck的每次迭代中进行计算.我不确定这是否会被Python优化,但如果你可以避免重新计算常量值,你会发现事情更快.C编译器为您执行此操作.
做的事情if condition: return True; else: return False很愚蠢 - 而且很费时间.简单地做return condition.
在Python 3.x中,/2必须创建浮点值.你似乎需要整数.你应该使用//2师.就它的功能而言,它将更接近C版本,但我认为它的速度要快得多.
最后,Python通常被解释.解释器总是比C慢得多.
tru*_*ppo 10
我在机器上从大约7秒到大约3秒钟:
i * (3 * i - 1 ) / 2对于每个值进行预计算,在您的计算中,它计算了两次非常多if i == g通过在范围中添加+1来删除if p_i > p_g因为p_i 总是小于p_g还将quickCheck函数放在main中,使所有变量都是本地的(查找比全局更快).我相信还有更多的微优化可用.
def main():
maxNumber = 5000
diff = 1000
p = {}
quickCache = {}
for i in range(maxNumber):
p[i] = i * (3 * i - 1 ) / 2
def quickCheck(n):
if n in quickCache: return quickCache[n]
partial_c = (sqrt(1 + 24 * (n)))/-6
c = 1/6 + partial_c
if int(c.real) == c.real:
quickCache[n] = True
return True
c = c - 2*partial_c
if int(c.real) == c.real:
quickCache[n] = True
return True
quickCache[n] = False
return False
for i in range(1, maxNumber):
mi = p[i]
for g in range(i+1, maxNumber):
ma = p[g]
if ma - mi < diff and quickCheck(ma - mi) and quickCheck(ma + mi):
print('New couple ', ma, mi)
diff = ma - mi
Run Code Online (Sandbox Code Playgroud)
因为函数p()单调递增,所以可以避免比较这些值,因为g> i意味着p(g)> p(i).此外,内循环可以提前破坏,因为p(g) - p(i)> = diff意味着p(g + 1) - p(i)> = diff.
同样为了正确性,我在quickCheck中更改了相等比较以比较与epsilon的差异,因为与浮点的精确比较非常脆弱.
在我的机器上,使用Python 2.6将运行时间减少到7.8ms.使用PyPy和JIT将其减少到0.77ms.
这表明在转向微优化之前,寻找算法优化是值得的.微优化使得识别算法的变化更加难以获得相对微小的收益.
EPS = 0.00000001
def quickCheck(n):
partial_c = sqrt(1 + 24*n) / -6
c = 1/6 + partial_c
if abs(int(c) - c) < EPS:
return True
c = 1/6 - partial_c
if abs(int(c) - c) < EPS:
return True
return False
def p(i):
return i * (3 * i - 1 ) / 2
def main(maxNumber):
diff = 1000
for i in range(1, maxNumber):
for g in range(i+1, maxNumber):
if p(g) - p(i) >= diff:
break
if quickCheck(p(g) - p(i)) and quickCheck(p(g) + p(i)):
print('New couple ', p(g), p(i), p(g) - p(i))
diff = p(g) - p(i)
Run Code Online (Sandbox Code Playgroud)