我正在寻找我的功能的快速替代品.目标是根据任意长度整数制作32位整数列表.长度在(value,bitlength)元组中明确给出.这是异步接口的bit-banging过程的一部分,每个总线事务需要4个32位整数.
所有整数都是无符号,正数或零,长度可以在0到2000之间变化
我的输入是这些元组的列表,输出应该是具有隐式32位长度的整数,其中的位按顺序排列.还应返回不适合32的其余位.
input: [(0,128),(1,12),(0,32)]
output:[0, 0, 0, 0, 0x100000], 0, 12
Run Code Online (Sandbox Code Playgroud)
我花了一两天的时间用cProfile进行分析,并尝试了不同的方法,但我似乎有点坚持使用一秒钟内需要~100k元组的函数,这有点慢.理想情况下,我想要10倍的加速,但我没有足够的经验知道从哪里开始.速度的最终目标是速度超过每秒4M元组.
感谢您的任何帮助或建议.
我能做的最快的是:
def foo(tuples):
'''make a list of tuples of (int, length) into a list of 32 bit integers [1,2,3]'''
length = 0
remlen = 0
remint = 0
i32list = []
for a, b in tuples:
n = (remint << (32-remlen)) | a #n = (a << (remlen)) | remint
length += b
if length > 32:
len32 = int(length/32)
for i in range(len32):
i32list.append((n >> i*32) & 0xFFFFFFFF)
remint = n >> (len32*32)
remlen = length - len32*32
length = remlen
elif length == 32:
appint = n & 0xFFFFFFFF
remint = 0
remlen = 0
length -= 32
i32list.append(appint)
else:
remint = n
remlen = length
return i32list, remint, remlen
Run Code Online (Sandbox Code Playgroud)
这有非常相似的表现:
def tpli_2_32ili(tuples):
'''make a list of tuples of (int, length) into a list of 32 bit integers [1,2,3]'''
# binarylist = "".join([np.binary_repr(a, b) for a, b in inp]) # bin(a)[2:].rjust(b, '0')
binarylist = "".join([bin(a)[2:].rjust(b, '0') for a, b in tuples])
totallength = len(binarylist)
tot32 = int(totallength/32)
i32list = [int(binarylist[i:i+32],2) for i in range(0, tot32*32, 32) ]
remlen = totallength - tot32*32
remint = int(binarylist[-remlen:],2)
return i32list, remint, remlen
Run Code Online (Sandbox Code Playgroud)
这不是一个更快实施的答案。相反,您在问题中放置的两个片段中的代码放置在可扩展的基准测试框架中,这使得比较不同的方法变得非常容易。
根据显示的输出,仅比较这两个测试用例,表明您的第二种方法与第一种方法的性能不太相似。事实上,速度几乎慢了一倍。
from collections import namedtuple
import sys
from textwrap import dedent
import timeit
import traceback
N = 1000 # Number of executions of each "algorithm".
R = 3 # Number of repetitions of those N executions.
# Common setup for all testcases (executed before any algorithm specific setup).
COMMON_SETUP = dedent("""
# Import any resources needed defined in outer benchmarking script.
#from __main__ import ??? # Not needed at this time
""")
class TestCase(namedtuple('CodeFragments', ['setup', 'test'])):
""" A test case is composed of separate setup and test code fragments. """
def __new__(cls, setup, test):
""" Dedent code fragment in each string argument. """
return tuple.__new__(cls, (dedent(setup), dedent(test)))
testcases = {
"First snippet": TestCase("""
def foo(tuples):
'''make a list of tuples of (int, length) into a list of 32 bit integers [1,2,3]'''
length = 0
remlen = 0
remint = 0
i32list = []
for a, b in tuples:
n = (remint << (32-remlen)) | a #n = (a << (remlen)) | remint
length += b
if length > 32:
len32 = int(length/32)
for i in range(len32):
i32list.append((n >> i*32) & 0xFFFFFFFF)
remint = n >> (len32*32)
remlen = length - len32*32
length = remlen
elif length == 32:
appint = n & 0xFFFFFFFF
remint = 0
remlen = 0
length -= 32
i32list.append(appint)
else:
remint = n
remlen = length
return i32list, remint, remlen
""", """
foo([(0,128),(1,12),(0,32)])
"""
),
"Second snippet": TestCase("""
def tpli_2_32ili(tuples):
'''make a list of tuples of (int, length) into a list of 32 bit integers [1,2,3]'''
binarylist = "".join([bin(a)[2:].rjust(b, '0') for a, b in tuples])
totallength = len(binarylist)
tot32 = int(totallength/32)
i32list = [int(binarylist[i:i+32],2) for i in range(0, tot32*32, 32) ]
remlen = totallength - tot32*32
remint = int(binarylist[-remlen:],2)
return i32list, remint, remlen
""", """
tpli_2_32ili([(0,128),(1,12),(0,32)])
"""
),
}
# Collect timing results of executing each testcase multiple times.
try:
results = [
(label,
min(timeit.repeat(testcases[label].test,
setup=COMMON_SETUP + testcases[label].setup,
repeat=R, number=N)),
) for label in testcases
]
except Exception:
traceback.print_exc(file=sys.stdout) # direct output to stdout
sys.exit(1)
# Display results.
major, minor, micro = sys.version_info[:3]
print('Fastest to slowest execution speeds using Python {}.{}.{}\n'
'({:,d} executions, best of {:d} repetitions)'.format(major, minor, micro, N, R))
print()
longest = max(len(result[0]) for result in results) # length of longest label
ranked = sorted(results, key=lambda t: t[1]) # ascending sort by execution time
fastest = ranked[0][1]
for result in ranked:
print('{:>{width}} : {:9.6f} secs, rel speed {:5,.2f}x, {:8,.2f}% slower '
''.format(
result[0], result[1], round(result[1]/fastest, 2),
round((result[1]/fastest - 1) * 100, 2),
width=longest))
Run Code Online (Sandbox Code Playgroud)
输出:
from collections import namedtuple
import sys
from textwrap import dedent
import timeit
import traceback
N = 1000 # Number of executions of each "algorithm".
R = 3 # Number of repetitions of those N executions.
# Common setup for all testcases (executed before any algorithm specific setup).
COMMON_SETUP = dedent("""
# Import any resources needed defined in outer benchmarking script.
#from __main__ import ??? # Not needed at this time
""")
class TestCase(namedtuple('CodeFragments', ['setup', 'test'])):
""" A test case is composed of separate setup and test code fragments. """
def __new__(cls, setup, test):
""" Dedent code fragment in each string argument. """
return tuple.__new__(cls, (dedent(setup), dedent(test)))
testcases = {
"First snippet": TestCase("""
def foo(tuples):
'''make a list of tuples of (int, length) into a list of 32 bit integers [1,2,3]'''
length = 0
remlen = 0
remint = 0
i32list = []
for a, b in tuples:
n = (remint << (32-remlen)) | a #n = (a << (remlen)) | remint
length += b
if length > 32:
len32 = int(length/32)
for i in range(len32):
i32list.append((n >> i*32) & 0xFFFFFFFF)
remint = n >> (len32*32)
remlen = length - len32*32
length = remlen
elif length == 32:
appint = n & 0xFFFFFFFF
remint = 0
remlen = 0
length -= 32
i32list.append(appint)
else:
remint = n
remlen = length
return i32list, remint, remlen
""", """
foo([(0,128),(1,12),(0,32)])
"""
),
"Second snippet": TestCase("""
def tpli_2_32ili(tuples):
'''make a list of tuples of (int, length) into a list of 32 bit integers [1,2,3]'''
binarylist = "".join([bin(a)[2:].rjust(b, '0') for a, b in tuples])
totallength = len(binarylist)
tot32 = int(totallength/32)
i32list = [int(binarylist[i:i+32],2) for i in range(0, tot32*32, 32) ]
remlen = totallength - tot32*32
remint = int(binarylist[-remlen:],2)
return i32list, remint, remlen
""", """
tpli_2_32ili([(0,128),(1,12),(0,32)])
"""
),
}
# Collect timing results of executing each testcase multiple times.
try:
results = [
(label,
min(timeit.repeat(testcases[label].test,
setup=COMMON_SETUP + testcases[label].setup,
repeat=R, number=N)),
) for label in testcases
]
except Exception:
traceback.print_exc(file=sys.stdout) # direct output to stdout
sys.exit(1)
# Display results.
major, minor, micro = sys.version_info[:3]
print('Fastest to slowest execution speeds using Python {}.{}.{}\n'
'({:,d} executions, best of {:d} repetitions)'.format(major, minor, micro, N, R))
print()
longest = max(len(result[0]) for result in results) # length of longest label
ranked = sorted(results, key=lambda t: t[1]) # ascending sort by execution time
fastest = ranked[0][1]
for result in ranked:
print('{:>{width}} : {:9.6f} secs, rel speed {:5,.2f}x, {:8,.2f}% slower '
''.format(
result[0], result[1], round(result[1]/fastest, 2),
round((result[1]/fastest - 1) * 100, 2),
width=longest))
Run Code Online (Sandbox Code Playgroud)