当我将所有算法转换为 numpy 二维数组时,我发现我的算法是最好的。当然性能也取决于二维数组的维度。但 380x900 是我的情况。我认为 Numpy 数组计算受益匪浅。以下是代码:
import numpy as np
import time
import sortedcontainers
def John(x): #x is 1D array
n=len(x)
rr=[]
for i in range(n):
rr.append(np.sum(x[i]>=x[:i+1]))
return np.array(rr)
def John_2D(rv): #rv is 2d numpy array. rank it along axis 1!
nr,nc=rv.shape
rr=[]
for i in range(nc):
rr.append(np.sum((rv[:,:i+1]<=rv[:,i:i+1]),axis=1))
return np.array(rr).T
def Matvei(x): #x is 1D array
return [sorted(x[:i+1]).index(v)+1 for i, v in enumerate(x)]
def Divarkar1(x):#x is 1D array
n = len(x)
rr=np.ones(n,dtype=int)
m1 = x[1:,None]>=x
m2 = np.tri(n-1,n,k=1, dtype=bool)
rr[1:] = (m1 & m2).sum(1)
return rr
def Divarkar2(x):#x is 1D array
n = len(x)
rr=np.ones(n,dtype=int)
m1 = x[1:,None]>=x
m2 = np.tri(n-1,n,k=1, dtype=bool)
(m1.astype(np.float32)[:,None,:] @ m2[:,:,None])[:,0,0]
rr[1:]=np.einsum('ij,ij->i',m1.astype(np.float32),m2)
return rr
def Monica1(nums): #nums is 1D array
sortednums = sortedcontainers.SortedList()
ranks = []
for num in nums:
ranks.append(sortednums.bisect_left(num))
sortednums.add(num)
return np.array(ranks)+1
def Monica2(nums): #nums is 1D array
_, indexes, ranks = _augmented_mergesort(nums)
result = [None]*len(nums)
for i, rank_ in zip(indexes, ranks):
result[i] = rank_
return np.array(result)+1
def _augmented_mergesort(nums): #nums is 1D array
# returns sorted nums, indexes of sorted nums in original nums, and corresponding ranks
if len(nums) == 1:
return nums, [0], [0]
left, right = nums[:len(nums)//2], nums[len(nums)//2:] #split the array by half
return _merge(*_augmented_mergesort(left), *_augmented_mergesort(right))
def _merge(lnums, lindexes, lranks, rnums, rindexes, rranks):
nums, indexes, ranks = [], [], []
i_left = i_right = 0
def add_from_left():
nonlocal i_left
nums.append(lnums[i_left])
indexes.append(lindexes[i_left])
ranks.append(lranks[i_left])
i_left += 1
def add_from_right():
nonlocal i_right
nums.append(rnums[i_right])
indexes.append(rindexes[i_right] + len(lnums))
ranks.append(rranks[i_right] + i_left)
i_right += 1
while i_left < len(lnums) and i_right < len(rnums):
if lnums[i_left] < rnums[i_right]:
add_from_left()
elif lnums[i_left] > rnums[i_right]:
add_from_right()
else:
raise ValueError("Tie detected")
if i_left < len(lnums):
while i_left < len(lnums):
add_from_left()
#nums += lnums[i_left:]
#indexes += lindexes[i_left:]
#ranks += lranks[i_left:]
else:
while i_right < len(rnums):
add_from_right()
return nums, indexes, ranks
def rank_2D(f,nums): #f is method, nums is 2D numpy array
result=[]
for x in nums:
result.append(f(x))
return np.array(result)
x=np.random.rand(6000)
for f in [John, Matvei, Divarkar1, Divarkar2, Monica1, Monica2]:
t1=time.time()
rr=f(x)
t2=time.time()
print(f'{f.__name__+"_1D: ":16} {(t2-t1):.3f}')
print()
x=np.random.rand(380,900)
t1=time.time()
rr=John_2D(x)
t2=time.time()
print(f'{"John_2D:":16} {(t2-t1):.3f}')
#print(rr)
for f in [Matvei, Divarkar1, Divarkar2, Monica1, Monica2]:
t1=time.time()
rr=rank_2D(f,x)
t2=time.time()
print(f'{f.__name__+"_2D: ":16} {(t2-t1):.3f}')
#print(rr)
典型的结果是:
John_1D: 0.069
Matvei_1D: 7.208
Divarkar1_1D: 0.163
Divarkar2_1D: 0.488
Monica1_1D: 0.032
Monica2_1D: 0.082
John_2D: 0.409
Matvei_2D: 49.044
Divarkar1_2D: 1.276
Divarkar2_2D: 4.065
Monica1_2D: 1.090
Monica2_2D: 3.571
对于一维数组,Monica1 方法是最好的,但我的 numpy-version 方法也不错。
对于二维数组,我的 numpy-version 方法是最好的。
欢迎测试和评论。
谢谢
约翰