@Marichyasana 是一个非常好的解决方案。我在回答您的问题并测试了多个池大小时有点过火了。代码大约有 114 行,您可以以此为基础进行偏离。
我正在测试 100,000 个样本列表的多个 Pool() 大小。我不认为我的代码非常“Pythonic”,任何反馈都会很棒。我用过这个example。
结果:
进程数:3;处理时间为:00:00:01
长度
结果列表 1000
进程数:10;处理时间为:00:00:02
长度
结果列表 1000
进程数:20;处理时间为:00:00:09
长度
结果列表 1000
进程数:33;处理时间为:00:00:04
长度
结果列表 1000
代码:
# from multiprocessing import Queue
from multiprocessing import Pool
from random import randint
from Timer import Timer # custom timer wraper I wrote
def create_list_indexes(m_int_list_len):
"""
this method creates as many evenly spaces segments for the list of data
m_int_list_len
type: int
desc: length of the list of data; number of samples of data
returns
type: list
desc: list of lists; indexes for data list
list_return[x][0] -> type: int; low index
list_return[x][1] -> type: int; high index
"""
# segment length
int_seg_len = 100
list_return = list()
# get number of segments fo the list
if m_int_list_len % int_seg_len == 0:
int_num_seg = int(m_int_list_len / int_seg_len)
bool_zero_mod = True
else:
int_num_seg = int(m_int_list_len / int_seg_len) + 1
bool_zero_mod = False
# create indexes of list
for int_i in range(0, int_num_seg):
# check for zero mod
if ~bool_zero_mod and int_i == int_num_seg - 1:
int_low = int_i * int_seg_len
int_high = m_int_list_len
else:
int_low = int_i * int_seg_len
int_high = int_low + int_seg_len - 1
list_return.append([int_low, int_high])
return list_return
def test_pools(m_tuple_args):
"""
this method tests the different number of pools on a large list of data
m_list_tasks
type: list
desc: list of tuples
m_tuple_args[0] -> type: int; target to search for
m_tuple_args[1] -> type: list; list of indexes
m_tuple_args[1][0] -> type: int; low index
m_tuple_args[1][1] -> type: int; high index
m_tuple_args[2] -> type: list; the data
returns
type: list
desc: list of lists; samples which are lists of length 2
"""
# unpack tuple for simplicity
int_target = m_tuple_args[0]
int_low, int_high = m_tuple_args[1]
list_data = m_tuple_args[2]
list_results = list()
for list_sample in list_data[int_low:int_high]:
if list_sample == int_target:
list_results.append(list_sample)
# return results
return list_results
if __name__ == '__main__':
# data structures for example
list_data = list()
list_proc = [3, 10, 20, 33]
int_num_data = 100000
int_max_int = 100
int_target = 42
# create random data list
for int_i in range(0, int_num_data):
list_data.append([randint(0, int_max_int),
randint(0, int_max_int)])
# pools of different sizes to compare
list_pools = [Pool(processes = x) for x in list_proc]
# create indexes for list
list_indexes = create_list_indexes(int_num_data)
# compare pools
int_counter = 0
for pool in list_pools:
# create task list
list_tasks = list()
for int_i in range(0, len(list_indexes)):
list_tasks.append((int_target, list_indexes[int_i], list_data))
# test pool
timer_pool = Timer()
list_pool = pool.map(test_pools, list_tasks)
string_pool = 'number of processes: ' + str(list_proc[int_counter])
timer_pool.stop_timer(string_pool)
print('length of results list', len(list_pool))
print()
# increment counter
int_counter += 1