您可以尝试忽略重复的合并排序。这是排序数组的 O(n) 操作。如果两个数组有 70% 的共同元素,则生成的集合将具有 130 个或更少的唯一整数。在您的情况下,您不需要结果,因此您只需计算唯一条目的数量并在达到 131 或两个数组的末尾时立即停止。
编辑 (2) 以下代码可以使用 4 个内核在大约 47 秒内进行约 1760 亿次比较。使用 4 门将代码进行多线程处理的速度仅提高了 70%。
仅当 int 值的范围非常小时,使用 BitSet 才有效。否则你必须比较 int[] (如果你需要的话,我已经把代码留在里面了)
在 47.712 秒内进行了 176,467,034,428 次比较,找到了 444,888 个匹配项
public static void main(String... args) throws InterruptedException {
int length = 100;
int[][] ints = generateArrays(50000, length);
final BitSet[] bitSets = new BitSet[ints.length];
for(int i=0;i<ints.length;i++) {
int[] ia = ints[i];
BitSet bs = new BitSet(ia[ia.length-1]);
for (int i1 : ia)
bs.set(i1);
bitSets[i] = bs;
}
final AtomicInteger matches = new AtomicInteger();
final AtomicLong comparisons = new AtomicLong();
int nThreads = Runtime.getRuntime().availableProcessors();
ExecutorService executorService = Executors.newFixedThreadPool(nThreads);
long start = System.nanoTime();
for (int i = 0; i < bitSets.length - 1; i++) {
final int finalI = i;
executorService.submit(new Runnable() {
public void run() {
for (int j = finalI + 1; j < bitSets.length; j++) {
int compare = compare(bitSets[finalI], bitSets[j]);
if (compare <= 130)
matches.incrementAndGet();
comparisons.addAndGet(compare);
}
}
});
}
executorService.shutdown();
executorService.awaitTermination(1, TimeUnit.HOURS);
long time = System.nanoTime() - start;
System.out.printf("Peformed %,d comparisons in %.3f seconds and found %,d matches %n",comparisons.longValue(),time/1e9, matches.intValue());
}
private static int[][] generateArrays(int count, int length) {
List<Integer> rawValues = new ArrayList<Integer>(170);
for (int i = 0; i < 170; i++)
rawValues.add(i);
int[][] ints = new int[count][length];
Random rand = new Random(1);
for (int[] ia : ints) {
Collections.shuffle(rawValues, rand);
for (int i = 0; i < ia.length; i++)
ia[i] = (int) (int) rawValues.get(i);
Arrays.sort(ia);
}
return ints;
}
private static int compare(int[] ia, int[] ja) {
int count = 0;
int i=0,j=0;
while(i<ia.length && j<ja.length) {
int iv = ia[i];
int jv = ja[j];
if (iv < jv) {
i++;
} else if (iv > jv) {
j++;
} else {
count++; // duplicate
i++;
j++;
}
}
return ia.length + ja.length - count;
}
private static int compare(BitSet ia, BitSet ja) {
BitSet both = new BitSet(Math.max(ia.length(), ja.length()));
both.or(ia);
both.or(ja);
return both.cardinality();
}