如果 L 已排序,您可以使用 bisect.bisect_left 查找所有 L[= i] 的索引 i。
然后
if n - L[i-1] < 1.0:
val = L[i-1]
elif L[i] - n < 1.0:
val = L[i]
else:
val = None # no such value found
编辑:根据您的数据、您想要完成的任务以及您想要花费多少时间编写一个聪明的算法,排序可能会也可能不会为您提供良好的解决方案;在我看到更多的 O(n) 挥手之前,我想指出他的实际问题似乎涉及反复探索 n 的各种值——这将很快分摊初始排序开销——他建议上面的算法其实是O(n**2)。
@AntoinePelisse:无论如何,让我们做一些分析:
from bisect import bisect_left, bisect_right
from functools import partial
import matplotlib.pyplot as plt
from random import randint, uniform
from timeit import timeit
#blues
density_col_lin = [
(0.000, 0.502, 0.000, 1.000),
(0.176, 0.176, 0.600, 1.000),
(0.357, 0.357, 0.698, 1.000),
(0.537, 0.537, 0.800, 1.000)
]
# greens
density_col_sor = [
(0.000, 0.502, 0.000, 1.000),
(0.176, 0.600, 0.176, 1.000),
(0.357, 0.698, 0.357, 1.000),
(0.537, 0.800, 0.537, 1.000)
]
def make_data(length, density):
max_ = length / density
return [uniform(0.0, max_) for _ in range(length)], max_
def linear_probe(L, max_, probes):
for p in range(probes):
n = randint(0, int(max_))
for index,val in enumerate(L):
if n - 1.0 < val < n + 1.0:
# return index
break
def sorted_probe(L, max_, probes):
# initial sort
sL = sorted((val,index) for index,val in enumerate(L))
for p in range(probes):
n = randint(0, int(max_))
left = bisect_right(sL, (n - 1.0, max_))
right = bisect_left (sL, (n + 1.0, 0.0 ), left)
if left < right:
index = min(sL[left:right], key=lambda s:s[1])[1]
# return index
def main():
densities = [0.8, 0.2, 0.08, 0.02]
probes = [1, 3, 10, 30, 100]
lengths = [[] for d in densities]
lin_pts = [[[] for p in probes] for d in densities]
sor_pts = [[[] for p in probes] for d in densities]
# time each function at various data lengths, densities, and probe repetitions
for d,density in enumerate(densities):
for trial in range(200):
print("{}-{}".format(density, trial))
# length in 10 to 5000, with log density
length = int(10 ** uniform(1.0, 3.699))
L, max_ = make_data(length, density)
lengths[d].append(length)
for p,probe in enumerate(probes):
lin = timeit(partial(linear_probe, L, max_, probe), number=5) / 5
sor = timeit(partial(sorted_probe, L, max_, probe), number=5) / 5
lin_pts[d][p].append(lin / probe)
sor_pts[d][p].append(sor / probe)
# plot the results
plt.figure(figsize=(9.,6.))
plt.axis([0, 5000, 0, 0.004])
for d,density in enumerate(densities):
xs = lengths[d]
lcol = density_col_lin[d]
scol = density_col_sor[d]
for p,probe in enumerate(probes):
plt.plot(xs, lin_pts[d][p], "o", color=lcol, markersize=4.0)
plt.plot(xs, sor_pts[d][p], "o", color=scol, markersize=4.0)
plt.show()
if __name__ == "__main__":
main()
导致
x 轴是 L 中的项目数,y 轴是每个探针的摊销时间;绿点是 sorted_probe(),蓝点是 linear_probe()。
结论:
- 这两个函数的运行时间与长度呈显着线性关系
- 对于 L 的单次探测,预排序比迭代慢大约 4 倍
- 交叉点似乎是大约 5 个探针;比这更少,线性搜索更快,更多,预排序更快。