【发布时间】:2017-09-29 10:56:13
【问题描述】:
UPDATE(反映最先进的知识水平)状态:2017-05-12
这次更新的原因是,当我问这个问题时,我并不知道我发现了一些关于 Python3 如何“在幕后”工作的东西。
所有接下来的结论是:
如果您为迭代器编写自己的 Python3 代码并关心执行速度,则应将其编写为生成器函数,而不是迭代器类。
下面是一个简约的代码示例,它展示了相同的算法(此处:Pythons 的自制版本range()) 表示为生成器函数比表示为迭代器类运行得快得多:
def gnrtYieldRange(startWith, endAt, step=1):
while startWith <= endAt:
yield startWith
startWith += step
class iterClassRange:
def __init__(self, startWith, endAt, step=1):
self.startWith = startWith - 1
self.endAt = endAt
self.step = step
def __iter__(self):
return self
def __next__(self):
self.startWith += self.step
if self.startWith <= self.endAt:
return self.startWith
else:
raise StopIteration
N = 10000000
print(" Size of created list N = {} elements (ints 1 to N)".format(N))
from time import time as t
from customRange import gnrtYieldRange as cthnYieldRange
from customRange import cintYieldRange
from customRange import iterClassRange as cthnClassRange
from customRange import cdefClassRange
iterPythnRangeObj = range(1, N+1)
gnrtYieldRangeObj = gnrtYieldRange(1, N)
cthnYieldRangeObj = cthnYieldRange(1, N)
cintYieldRangeObj = cintYieldRange(1, N)
iterClassRangeObj = iterClassRange(1, N)
cthnClassRangeObj = cthnClassRange(1, N)
cdefClassRangeObj = cdefClassRange(1, N)
sEXECs = [
"liPR = list(iterPythnRangeObj)",
"lgYR = list(gnrtYieldRangeObj)",
"lcYR = list(cthnYieldRangeObj)",
"liGR = list(cintYieldRangeObj)",
"liCR = list(iterClassRangeObj)",
"lcCR = list(cthnClassRangeObj)",
"ldCR = list(cdefClassRangeObj)"
]
sCOMMENTs = [
"Python3 own range(1, N+1) used here as reference for timings ",
"self-made range generator function using yield (run as it is) ",
"self-made range (with yield) run from module created by Cython",
"Cython-optimized self-made range (using yield) run from module",
"self-made range as iterator class using __next__() and return ",
"self-made range (using __next__) from module created by Cython",
"Cython-optimized self-made range (using __next__) from module "
]
for idx, sEXEC in enumerate(sEXECs):
s=t();exec(sEXEC);e=t();print("{} takes: {:3.1f} sec.".format(sCOMMENTs[idx], e-s))
print("All created lists are equal:", all([liPR == lgYR, lgYR == lcYR, lcYR == liGR, liGR == liCR, liCR == lcCR, lcCR == ldCR]) )
print("Run on Linux Mint 18.1, used Cython.__version__ == '0.25.2'")
上面的代码放入一个文件并运行打印到标准输出:
>python3.6 -u "gnrtFunction-fasterThan-iterClass_runMe.py"
Size of created list N = 10000000 elements (ints 1 to N)
Python3 own range(1, N+1) used here as reference for timings takes: 0.2 sec.
self-made range generator function using yield (run as it is) takes: 1.1 sec.
self-made range (with yield) run from module created by Cython takes: 0.5 sec.
Cython-optimized self-made range (using yield) run from module takes: 0.3 sec.
self-made range as iterator class using __next__() and return takes: 3.9 sec.
self-made range (using __next__) from module created by Cython takes: 3.3 sec.
Cython-optimized self-made range (using __next__) from module takes: 0.2 sec.
All created lists are equal: True
Run on Linux Mint 18.1, used Cython.__version__ == '0.25.2'
>Exit code: 0
从上面的时序可以看出,自制range() 迭代器的生成器函数变体比迭代器类变体运行得更快,并且当不涉及代码优化时,这种行为也会传播到 C 的 C 代码级别-Cython 创建的代码。
如果您想知道为什么会这样详细说明,您可以通读提供的答案或自己玩一下提供的代码。
在运行上述代码所需的缺失代码片段下方:
customRange.pyx - Cython 文件从以下位置创建 customRange 模块:
def gnrtYieldRange(startWith, endAt, step=1):
while startWith <= endAt:
yield startWith
startWith += step
class iterClassRange:
def __init__(self, startWith, endAt, step=1):
self.startWith = startWith - 1
self.endAt = endAt
self.step = step
def __iter__(self):
return self
def __next__(self):
self.startWith += self.step
if self.startWith <= self.endAt:
return self.startWith
else:
raise StopIteration
def cintYieldRange(int startWith, int endAt, int step=1):
while startWith <= endAt:
yield startWith
startWith += step
cdef class cdefClassRange:
cdef int startWith
cdef int endAt
cdef int step
def __init__(self, int startWith, int endAt, int step=1):
self.startWith = startWith - 1
self.endAt = endAt
self.step = step
def __iter__(self):
return self
def __next__(self):
self.startWith += self.step
if self.startWith <= self.endAt:
return self.startWith
else:
raise StopIteration
以及用于创建 Python customRange 模块的设置文件 customRange-setup.py:
import sys
sys.argv += ['build_ext', '--inplace']
from distutils.core import setup
from Cython.Build import cythonize
setup(
name = 'customRange',
ext_modules = cythonize("customRange.pyx"),
)
现在一些进一步的信息可以更容易地理解所提供的答案:
在我提出这个问题的时候,我正忙于一个相当复杂的问题
使用yield从生成器函数形式可用的非唯一列表生成唯一组合的算法。我的目标是使用该算法创建一个用 C 语言编写的 Python 模块,以使其运行得更快。为此,我将使用yield 的生成器函数重写为使用__next__() 和return 的迭代器类。当我比较算法的两种变体的速度时,我很惊讶迭代器类比生成器函数慢两倍,并且我(错误地)假设它与我的方式有关重写了算法(如果你想更好地理解这里的答案是关于什么的,你需要知道这一点),因此
原问如何让迭代器类版本和生成器函数运行速度相同,速度差异从何而来?。
下面是有关问题历史的更多信息:
在下面提供的 Python 脚本代码中,使用 Python function 和 yield 以及使用 class 和 __next__ 实现了从非唯一元素列表创建唯一组合的完全相同的算法。代码在复制/粘贴之后就可以运行了,所以你可以自己看看我在说什么。
在纯 Python 代码中观察到的相同现象会传播到由 Cython 从脚本代码创建的 Python 扩展模块的 C 代码中,因此它不限于 Python 级别的代码,因为它不会在C 代码级别。
问题是:
执行速度的巨大差异从何而来? 有什么办法可以让两种代码变体以相当的速度运行?与函数/产量变体相比,类/下一个实现是否有问题?据我所知,两者都是完全相同的代码...
这里的代码(调整突出显示行中的数字会改变列表中元素的唯一性级别,生成的组合对运行时间有巨大影响):
def uniqCmboYieldIter(lstItems, lenCmbo):
dctCounter = {}
lenLstItems = len(lstItems)
for idx in range(lenLstItems):
item = lstItems[idx]
if item in dctCounter.keys():
dctCounter[item] += 1
else:
dctCounter[item] = 1
#:if
#:for
lstUniqs = sorted(dctCounter.keys())
lstCntRpts = [dctCounter[item] for item in lstUniqs]
lenUniqs = len(lstUniqs)
cmboAsIdxUniqs = [None] * lenCmbo
multiplicities = [0] * lenUniqs
idxIntoCmbo, idxIntoUniqs = 0, 0
while idxIntoCmbo != lenCmbo and idxIntoUniqs != lenUniqs:
count = min(lstCntRpts[idxIntoUniqs], lenCmbo-idxIntoCmbo)
cmboAsIdxUniqs[idxIntoCmbo : idxIntoCmbo + count] = [idxIntoUniqs] * count
multiplicities[idxIntoUniqs] = count
idxIntoCmbo += count
idxIntoUniqs += 1
if idxIntoCmbo != lenCmbo:
return
while True:
yield tuple(lstUniqs[idxUniqs] for idxUniqs in cmboAsIdxUniqs)
for idxIntoCmbo in reversed(range(lenCmbo)):
x = cmboAsIdxUniqs[idxIntoCmbo]
y = x + 1
if y < lenUniqs and multiplicities[y] < lstCntRpts[y]:
break
else:
return
for idxIntoCmbo in range(idxIntoCmbo, lenCmbo):
x = cmboAsIdxUniqs[idxIntoCmbo]
cmboAsIdxUniqs[idxIntoCmbo] = y
multiplicities[x] -= 1
multiplicities[y] += 1
# print("# multiplicities:", multiplicities)
while y != lenUniqs and multiplicities[y] == lstCntRpts[y]:
y += 1
if y == lenUniqs:
break
class uniqCmboClassIter:
# ----------------------------------------------------------------------------------------------
def __iter__(self):
return self
# ----------------------------------------------------------------------------------------------
def __init__(self, lstItems, lenCmbo):
dctCounter = {}
lenLstItems = len(lstItems)
for idx in range(lenLstItems):
item = lstItems[idx]
if item in dctCounter.keys():
dctCounter[item] += 1
else:
dctCounter[item] = 1
#:if
#:for
self.lstUniqs = sorted(dctCounter.keys())
self.lenUniqs = len(self.lstUniqs)
self.lstCntRpts = [dctCounter[item] for item in self.lstUniqs]
self.lenCmbo = lenCmbo
self.cmboAsIdxUniqs = [None] * lenCmbo
self.multiplicities = [0] * self.lenUniqs
self.idxIntoCmbo, self.idxIntoUniqs = 0, 0
while self.idxIntoCmbo != self.lenCmbo and self.idxIntoUniqs != self.lenUniqs:
count = min(self.lstCntRpts[self.idxIntoUniqs], self.lenCmbo-self.idxIntoCmbo)
self.cmboAsIdxUniqs[self.idxIntoCmbo : self.idxIntoCmbo + count] = [self.idxIntoUniqs] * count
self.multiplicities[self.idxIntoUniqs] = count
self.idxIntoCmbo += count
self.idxIntoUniqs += 1
# print("self.multiplicities:", self.multiplicities)
# print("self.cmboAsIdxUniqs:", self.cmboAsIdxUniqs)
if self.idxIntoCmbo != self.lenCmbo:
return
self.stopIteration = False
self.x = None
self.y = None
return
# ----------------------------------------------------------------------------------------------
def __next__(self):
if self.stopIteration is True:
raise StopIteration
return
nextCmbo = tuple(self.lstUniqs[idxUniqs] for idxUniqs in self.cmboAsIdxUniqs)
for self.idxIntoCmbo in reversed(range(self.lenCmbo)):
self.x = self.cmboAsIdxUniqs[self.idxIntoCmbo]
self.y = self.x + 1
if self.y < self.lenUniqs and self.multiplicities[self.y] < self.lstCntRpts[self.y]:
break
else:
self.stopIteration = True
return nextCmbo
for self.idxIntoCmbo in range(self.idxIntoCmbo, self.lenCmbo):
self.x = self.cmboAsIdxUniqs[self.idxIntoCmbo]
self.cmboAsIdxUniqs[self.idxIntoCmbo] = self.y
self.multiplicities[self.x] -= 1
self.multiplicities[self.y] += 1
# print("# multiplicities:", multiplicities)
while self.y != self.lenUniqs and self.multiplicities[self.y] == self.lstCntRpts[self.y]:
self.y += 1
if self.y == self.lenUniqs:
break
return nextCmbo
# ============================================================================================================================================
lstSize = 48 # 48
uniqLevel = 12 # (7 ~60% unique) higher level => more unique items in the generated list
aList = []
from random import randint
for _ in range(lstSize):
aList.append( ( randint(1,uniqLevel), randint(1,uniqLevel) ) )
lenCmbo = 6
percUnique = 100.0 - 100.0*(lstSize-len(set(aList)))/lstSize
print("======================== lenCmbo:", lenCmbo,
" sizeOfList:", len(aList),
" noOfUniqueInList", len(set(aList)),
" percUnique", int(percUnique) )
import time
from itertools import combinations
# itertools.combinations
# ---
# def uniqCmboYieldIter(lstItems, lenCmbo):
# class uniqCmboClassIter: def __init__(self, lstItems, lenCmbo):
# ---
start_time = time.time()
print("Combos:%9i"%len(list(combinations(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list( combinations(aList, lenCmbo)))):", "{:9.5f}".format(duration), "seconds.")
start_time = time.time()
print("Combos:%9i"%len(list(uniqCmboYieldIter(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(uniqCmboYieldIter(aList, lenCmbo)))):", "{:9.5f}".format(duration), "seconds.")
start_time = time.time()
print("Combos:%9i"%len(list(uniqCmboClassIter(aList, lenCmbo))), " ", end='')
duration = time.time() - start_time
print("print(len(list(uniqCmboClassIter(aList, lenCmbo)))):", "{:9.5f}".format(duration), "seconds.")
还有我盒子上的时间:
>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
======================== lenCmbo: 6 sizeOfList: 48 noOfUniqueInList 32 percUnique 66
Combos: 12271512 print(len(list( combinations(aList, lenCmbo)))): 2.04635 seconds.
Combos: 1296058 print(len(list(uniqCmboYieldIter(aList, lenCmbo)))): 3.25447 seconds.
Combos: 1296058 print(len(list(uniqCmboClassIter(aList, lenCmbo)))): 5.97371 seconds.
>Exit code: 0
[2017-05-02_03:23] 207474 <-Chrs,Keys-> 1277194 OnSave(): '/home/claudio/CgMint18/_Cg.DIR/ClaudioOnline/at-stackoverflow/bySubject/uniqueCombinations/nonRecursiveUniqueCombos_Cg.py'
>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
======================== lenCmbo: 6 sizeOfList: 48 noOfUniqueInList 22 percUnique 45
Combos: 12271512 print(len(list( combinations(aList, lenCmbo)))): 2.05199 seconds.
Combos: 191072 print(len(list(uniqCmboYieldIter(aList, lenCmbo)))): 0.47343 seconds.
Combos: 191072 print(len(list(uniqCmboClassIter(aList, lenCmbo)))): 0.89860 seconds.
>Exit code: 0
[2017-05-02_03:23] 207476 <-Chrs,Keys-> 1277202 OnSave(): '/home/claudio/CgMint18/_Cg.DIR/ClaudioOnline/at-stackoverflow/bySubject/uniqueCombinations/nonRecursiveUniqueCombos_Cg.py'
>python3.6 -u "nonRecursiveUniqueCombos_Cg.py"
======================== lenCmbo: 6 sizeOfList: 48 noOfUniqueInList 43 percUnique 89
Combos: 12271512 print(len(list( combinations(aList, lenCmbo)))): 2.17285 seconds.
Combos: 6560701 print(len(list(uniqCmboYieldIter(aList, lenCmbo)))): 16.72573 seconds.
Combos: 6560701 print(len(list(uniqCmboClassIter(aList, lenCmbo)))): 31.17714 seconds.
>Exit code: 0
更新(状态 2017-05-07):
在提出问题并提供赏金时,我不知道有一种方法可以轻松地使用 Cython 从 Python 脚本代码中为迭代器对象创建扩展模块的 C 代码,并且这样的 C 代码也可以使用
yield从迭代器函数创建。
考虑到生成的更快版本的 C 扩展模块仍然不够快,无法与 itertools.combinations 竞争一个迭代器函数以及如何克服这个问题。找到一种使用 Cython 加速更快版本的方法更有意义,特别是因为我是编写 Python 扩展模块的新手的itertools.combinations 进行了自己的修改,因为Segmentation Fault 错误我无法理解其原因。
目前我认为我使用的 Cython 代码仍有加速的空间,无需自己编写 C 代码。
在运行正常的 Cython 代码和速度优化的 Cython 代码下方,该代码以某种方式改变(我目前看不到原因)算法的工作方式并因此产生错误的结果。 Cython 优化背后的想法是在 Cython 代码中使用 Python/Cython 数组而不是 Python 列表。欢迎任何提示如何以新手“安全”的方式从使用的算法中获得更快运行的 Python 扩展模块。
def subbags_by_loops_with_dict_counter(lstItems, int lenCmbo):
dctCounter = {}
cdef int lenLstItems = len(lstItems)
cdef int idx = 0
for idx in range(lenLstItems):
item = lstItems[idx]
if item in dctCounter.keys():
dctCounter[item] += 1
else:
dctCounter[item] = 1
#:if
#:for
lstUniqs = sorted(dctCounter.keys())
lstCntRpts = [dctCounter[item] for item in lstUniqs]
cdef int lenUniqs = len(lstUniqs)
cmboAsIdxUniqs = [None] * lenCmbo
multiplicities = [0] * lenUniqs
cdef int idxIntoCmbo
cdef int idxIntoUniqs
cdef int count
while idxIntoCmbo != lenCmbo and idxIntoUniqs != lenUniqs:
count = min(lstCntRpts[idxIntoUniqs], lenCmbo-idxIntoCmbo)
cmboAsIdxUniqs[idxIntoCmbo : idxIntoCmbo + count] = [idxIntoUniqs] * count
multiplicities[idxIntoUniqs] = count
idxIntoCmbo += count
idxIntoUniqs += 1
if idxIntoCmbo != lenCmbo:
return
cdef int x
cdef int y
while True:
yield tuple(lstUniqs[idxUniqs] for idxUniqs in cmboAsIdxUniqs)
for idxIntoCmbo in reversed(range(lenCmbo)):
x = cmboAsIdxUniqs[idxIntoCmbo]
y = x + 1
if y < lenUniqs and multiplicities[y] < lstCntRpts[y]:
break
else:
return
for idxIntoCmbo in range(idxIntoCmbo, lenCmbo):
x = cmboAsIdxUniqs[idxIntoCmbo]
cmboAsIdxUniqs[idxIntoCmbo] = y
multiplicities[x] -= 1
multiplicities[y] += 1
while y != lenUniqs and multiplicities[y] == lstCntRpts[y]:
y += 1
if y == lenUniqs:
break
以下优化的 CYTHON 代码会产生错误的结果:
def subbags_loops_dict_cython_optimized(lstItems, int lenCmbo):
dctCounter = {}
cdef int lenLstItems = len(lstItems)
cdef int idx = 0
for idx in range(lenLstItems):
item = lstItems[idx]
if item in dctCounter.keys():
dctCounter[item] += 1
else:
dctCounter[item] = 1
#:if
#:for
lstUniqs = sorted(dctCounter.keys())
lstCntRpts = [dctCounter[item] for item in lstUniqs]
cdef int lenUniqs = len(lstUniqs)
cdef array.array cmboAsIdxUniqs = array.array('i', [])
array.resize(cmboAsIdxUniqs, lenCmbo)
# cmboAsIdxUniqs = [None] * lenCmbo
cdef array.array multiplicities = array.array('i', [])
array.resize(multiplicities, lenUniqs)
# multiplicities = [0] * lenUniqs
cdef int idxIntoCmbo
cdef int maxIdxCmbo
cdef int curIdxCmbo
cdef int idxIntoUniqs
cdef int count
while idxIntoCmbo != lenCmbo and idxIntoUniqs != lenUniqs:
count = min(lstCntRpts[idxIntoUniqs], lenCmbo-idxIntoCmbo)
maxIdxCmbo = idxIntoCmbo + count
curIdxCmbo = idxIntoCmbo
while curIdxCmbo < maxIdxCmbo:
cmboAsIdxUniqs[curIdxCmbo] = idxIntoUniqs
curIdxCmbo += 1
multiplicities[idxIntoUniqs] = count
idxIntoCmbo += count
idxIntoUniqs += 1
# print("multiplicities:", multiplicities)
# print("cmboAsIdxUniqs:", cmboAsIdxUniqs)
if idxIntoCmbo != lenCmbo:
return
cdef int x
cdef int y
while True:
yield tuple(lstUniqs[idxUniqs] for idxUniqs in cmboAsIdxUniqs)
for idxIntoCmbo in reversed(range(lenCmbo)):
x = cmboAsIdxUniqs[idxIntoCmbo]
y = x + 1
if y < lenUniqs and multiplicities[y] < lstCntRpts[y]:
break
else:
return
for idxIntoCmbo in range(idxIntoCmbo, lenCmbo):
x = cmboAsIdxUniqs[idxIntoCmbo]
cmboAsIdxUniqs[idxIntoCmbo] = y
multiplicities[x] -= 1
multiplicities[y] += 1
# print("# multiplicities:", multiplicities)
while y != lenUniqs and multiplicities[y] == lstCntRpts[y]:
y += 1
if y == lenUniqs:
break
【问题讨论】:
标签: performance python-3.x iterator generator yield