【发布时间】:2021-03-08 06:31:33
【问题描述】:
问题
numpy/core/arrayprint.pyarrayprint.py 中的 recursor 是什么?为什么要花费时间?有资源请欣赏
背景
注意到计算 softmax exp(x) / sum(exp(X)) 需要时间并运行分析器。
def softmax(X: Union[np.ndarray, float]) -> Union[np.ndarray, float]:
C = np.max(X, axis=-1, keepdims=True)
exp = np.exp(X - C) # to prevent overflow
return exp / np.sum(exp, axis=-1, keepdims=True)
profiler = cProfile.Profile()
profiler.enable()
for _ in range(1000):
softmax(X)
profiler.disable()
profiler.print_stats(sort="cumtime")
显然它在 arrayprint.py 中花费了大部分时间,尤其是在递归器中。因此想知道什么是arrayprint,是否有办法提高性能。
129000/3000 0.335 0.000 1.106 0.000 arrayprint.py:718(recurser)
整个分析器输出。
2419006 function calls (2275006 primitive calls) in 2.158 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1000 0.136 0.000 2.158 0.002 functions.py:173(softmax)
3000 0.006 0.000 1.966 0.001 arrayprint.py:1473(_array_str_implementation)
3000 0.013 0.000 1.960 0.001 arrayprint.py:516(array2string)
3000 0.013 0.000 1.926 0.001 arrayprint.py:461(wrapper)
3000 0.022 0.000 1.908 0.001 arrayprint.py:478(_array2string)
3000 0.005 0.000 1.111 0.000 arrayprint.py:709(_formatArray)
129000/3000 0.335 0.000 1.106 0.000 arrayprint.py:718(recurser)
3000 0.016 0.000 0.677 0.000 arrayprint.py:409(_get_format_function)
3000 0.005 0.000 0.651 0.000 arrayprint.py:366(<lambda>)
3000 0.012 0.000 0.646 0.000 arrayprint.py:836(__init__)
3000 0.112 0.000 0.632 0.000 arrayprint.py:863(fillFormat)
108000 0.368 0.000 0.588 0.000 arrayprint.py:947(__call__)
216000 0.395 0.000 0.395 0.000 {built-in method numpy.core._multiarray_umath.dragon4_positional}
111000 0.053 0.000 0.323 0.000 arrayprint.py:918(<genexpr>)
111000 0.075 0.000 0.249 0.000 arrayprint.py:913(<genexpr>)
126000 0.110 0.000 0.152 0.000 arrayprint.py:695(_extendLine)
17000 0.034 0.000 0.134 0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
24000 0.040 0.000 0.096 0.000 {built-in method builtins.max}
21000/3000 0.043 0.000 0.094 0.000 arrayprint.py:324(_leading_trailing)
8000 0.017 0.000 0.085 0.000 fromnumeric.py:70(_wrapreduction)
960000 0.078 0.000 0.078 0.000 {built-in method builtins.len}
4000 0.005 0.000 0.071 0.000 <__array_function__ internals>:2(amax)
8000 0.062 0.000 0.062 0.000 {method 'reduce' of 'numpy.ufunc' objects}
4000 0.008 0.000 0.062 0.000 fromnumeric.py:2589(amax)
9000 0.008 0.000 0.037 0.000 <__array_function__ internals>:2(concatenate)
6000 0.013 0.000 0.034 0.000 _ufunc_config.py:32(seterr)
111000 0.021 0.000 0.029 0.000 arrayprint.py:922(<genexpr>)
111000 0.020 0.000 0.027 0.000 arrayprint.py:923(<genexpr>)
3000 0.004 0.000 0.025 0.000 _ufunc_config.py:433(__enter__)
3000 0.003 0.000 0.025 0.000 <__array_function__ internals>:2(amin)
108000 0.021 0.000 0.021 0.000 {method 'split' of 'str' objects}
1000 0.002 0.000 0.021 0.000 <__array_function__ internals>:2(sum)
3000 0.004 0.000 0.020 0.000 fromnumeric.py:2714(amin)
3000 0.009 0.000 0.018 0.000 arrayprint.py:60(_make_options_dict)
1000 0.003 0.000 0.018 0.000 fromnumeric.py:2105(sum)
3000 0.003 0.000 0.015 0.000 _ufunc_config.py:438(__exit__)
6000 0.012 0.000 0.013 0.000 _ufunc_config.py:132(geterr)
18000 0.008 0.000 0.012 0.000 index_tricks.py:727(__getitem__)
3000 0.007 0.000 0.007 0.000 arrayprint.py:358(_get_formatdict)
3000 0.007 0.000 0.007 0.000 {built-in method builtins.locals}
27000 0.006 0.000 0.006 0.000 {method 'rstrip' of 'str' objects}
24000 0.006 0.000 0.006 0.000 {built-in method builtins.isinstance}
6000 0.006 0.000 0.006 0.000 {built-in method numpy.seterrobj}
8000 0.005 0.000 0.005 0.000 fromnumeric.py:71(<dictcomp>)
3000 0.002 0.000 0.004 0.000 _asarray.py:14(asarray)
12000 0.003 0.000 0.003 0.000 {built-in method numpy.geterrobj}
1000 0.002 0.000 0.003 0.000 __init__.py:1412(debug)
3000 0.002 0.000 0.002 0.000 arrayprint.py:65(<dictcomp>)
11000 0.002 0.000 0.002 0.000 {method 'items' of 'dict' objects}
3000 0.002 0.000 0.002 0.000 {built-in method numpy.array}
12000 0.002 0.000 0.002 0.000 {built-in method builtins.issubclass}
3000 0.002 0.000 0.002 0.000 {method 'update' of 'dict' objects}
9000 0.002 0.000 0.002 0.000 multiarray.py:143(concatenate)
3000 0.002 0.000 0.002 0.000 _ufunc_config.py:429(__init__)
3000 0.002 0.000 0.002 0.000 {method 'discard' of 'set' objects}
1000 0.001 0.000 0.001 0.000 __init__.py:1677(isEnabledFor)
3000 0.001 0.000 0.001 0.000 {built-in method builtins.id}
3000 0.001 0.000 0.001 0.000 {method 'add' of 'set' objects}
3000 0.001 0.000 0.001 0.000 arrayprint.py:827(_none_or_positive_arg)
3000 0.001 0.000 0.001 0.000 {built-in method _thread.get_ident}
3000 0.001 0.000 0.001 0.000 {method 'copy' of 'dict' objects}
4000 0.001 0.000 0.001 0.000 fromnumeric.py:2584(_amax_dispatcher)
3000 0.001 0.000 0.001 0.000 fromnumeric.py:2709(_amin_dispatcher)
1000 0.000 0.000 0.000 0.000 fromnumeric.py:2100(_sum_dispatcher)
1 0.000 0.000 0.000 0.000 __init__.py:214(_acquireLock)
1 0.000 0.000 0.000 0.000 __init__.py:223(_releaseLock)
1 0.000 0.000 0.000 0.000 __init__.py:1663(getEffectiveLevel)
1 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.RLock' objects}
1 0.000 0.000 0.000 0.000 {method 'release' of '_thread.RLock' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
python3.8/site-packages/numpy/core/arrayprint.py 中的递归器
def _formatArray(a, format_function, line_width, next_line_prefix,
separator, edge_items, summary_insert, legacy):
"""formatArray is designed for two modes of operation:
1. Full output
2. Summarized output
"""
def recurser(index, hanging_indent, curr_width):
"""
By using this local function, we don't need to recurse with all the
arguments. Since this function is not created recursively, the cost is
not significant
"""
axis = len(index)
axes_left = a.ndim - axis
if axes_left == 0:
return format_function(a[index])
# when recursing, add a space to align with the [ added, and reduce the
# length of the line by 1
next_hanging_indent = hanging_indent + ' '
if legacy == '1.13':
next_width = curr_width
else:
next_width = curr_width - len(']')
a_len = a.shape[axis]
show_summary = summary_insert and 2*edge_items < a_len
if show_summary:
leading_items = edge_items
trailing_items = edge_items
else:
leading_items = 0
trailing_items = a_len
# stringify the array with the hanging indent on the first line too
s = ''
# last axis (rows) - wrap elements if they would not fit on one line
if axes_left == 1:
# the length up until the beginning of the separator / bracket
if legacy == '1.13':
elem_width = curr_width - len(separator.rstrip())
else:
elem_width = curr_width - max(len(separator.rstrip()), len(']'))
line = hanging_indent
for i in range(leading_items):
word = recurser(index + (i,), next_hanging_indent, next_width)
s, line = _extendLine(
s, line, word, elem_width, hanging_indent, legacy)
line += separator
if show_summary:
s, line = _extendLine(
s, line, summary_insert, elem_width, hanging_indent, legacy)
if legacy == '1.13':
line += ", "
else:
line += separator
for i in range(trailing_items, 1, -1):
word = recurser(index + (-i,), next_hanging_indent, next_width)
s, line = _extendLine(
s, line, word, elem_width, hanging_indent, legacy)
line += separator
if legacy == '1.13':
# width of the separator is not considered on 1.13
elem_width = curr_width
word = recurser(index + (-1,), next_hanging_indent, next_width)
s, line = _extendLine(
s, line, word, elem_width, hanging_indent, legacy)
s += line
# other axes - insert newlines between rows
else:
s = ''
line_sep = separator.rstrip() + '\n'*(axes_left - 1)
for i in range(leading_items):
nested = recurser(index + (i,), next_hanging_indent, next_width)
s += hanging_indent + nested + line_sep
if show_summary:
if legacy == '1.13':
# trailing space, fixed nbr of newlines, and fixed separator
s += hanging_indent + summary_insert + ", \n"
else:
s += hanging_indent + summary_insert + line_sep
for i in range(trailing_items, 1, -1):
nested = recurser(index + (-i,), next_hanging_indent,
next_width)
s += hanging_indent + nested + line_sep
nested = recurser(index + (-1,), next_hanging_indent, next_width)
s += hanging_indent + nested
# remove the hanging indent, and wrap in []
s = '[' + s[len(hanging_indent):] + ']'
return s
try:
# invoke the recursive part with an initial index and prefix
return recurser(index=(),
hanging_indent=next_line_prefix,
curr_width=line_width)
finally:
# recursive closures have a cyclic reference to themselves, which
# requires gc to collect (gh-10620). To avoid this problem, for
# performance and PyPy friendliness, we break the cycle:
recurser = None
更新
我的错误是放置一个非懒惰的记录器语句。
【问题讨论】:
标签: performance numpy