如何优化 MAC 的搜索 [关闭]答案

【问题标题】：how to optimize search of MAC [closed]如何优化 MAC 的搜索 [关闭]
【发布时间】：2018-11-01 22:11:14
【问题描述】：

我必须加快在数组中搜索 MAC 地址（大小：32k）。我想从中获得更好的性能，我写了一个小示例代码来显示问题（请注意，数组中的 MAC 将是随机数（随机端口，随机 vlan）并且不是很好地排序（如显示在示例代码）。现在我正在寻找如何改进的建议，即加快速度：

#include <stdio.h>
#include <string.h>

#define MAX_MAC 32768
typedef unsigned char l2_mac_t[6];
typedef struct l2_s {
    int prt;
    int vln;
    l2_mac_t mac;
}l2_t;
int find_mac(int port, int vlan, l2_mac_t mac);
void fill_mac(void);

static l2_t arr[MAX_MAC] = {0};

int main (void) {
    int i = 0;
    int res = 0;
    fill_mac();
    for (i=0;i<MAX_MAC;i++) {
        res = find_mac(arr[i].prt,arr[i].vln,arr[i].mac);
        if (res%1000 == 0 )
            printf("Got MAC %d\n",res);
    }

}
int find_mac(int port, int vlan, l2_mac_t mac) {
    int i = 0;

    for (int i = 0;i< MAX_MAC; i++) {
        if (arr[i].prt == port) {
            if (arr[i].vln == vlan) {
                if (memcmp(arr[i].mac,mac,sizeof(l2_mac_t)) == 0 ) {
                //found
                    return i;
                }
            }
        }
    }
}

void fill_mac(void) {
    int i = 0;
    for (i=0;i<MAX_MAC; i++) {
        arr[i].prt = 4;
        arr[i].vln = 10;
        arr[i].mac[5] = i%255;
        arr[i].mac[4] = i%65025;

    }
}

下面是获取一些cmets后的一些编辑代码：

好的，

我打算使用哈希并想出了以下内容（这给了我一个段错误，因为它不想在init() 中分配这么多的内存）。另外，这感觉有点像用大锤敲它，肯定有比下面MacSum()更好的哈希方法，欢迎任何建议！

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define MAX_MAC 32768

#define MacSum(x)   (x)[0]*(2^24) \
                   +(x)[1]*(2^20) \
                   +(x)[2]*(2^16) \
                   +(x)[3]*(2^12)\
                   +(x)[4]*(2^8)\
                   +(x)[5]


typedef unsigned char l2_mac_t[6];
typedef struct l2_s {
    int prt;
    int vln;
    l2_mac_t mac;
}l2_t;

static unsigned short *L2Hash=0;

int find_mac(int port, int vlan, l2_mac_t mac);
void fill_mac(void);
void init(void);

static l2_t arr[MAX_MAC] = {0};

int main (void) {
    int i = 0;
    int res = 0;
    init();
    fill_mac();
    for (i=0;i<MAX_MAC;i++) {
        res = find_mac(arr[i].prt,arr[i].vln,arr[i].mac);
        /*if (res%1000 == 0 )
            printf("Got MAC %d\n",res);*/
    }

}
int find_mac(int port, int vlan, l2_mac_t mac) {
    int i = 0;
    int key = 0;

    key = MacSum(mac);
    if (memcmp(arr[key].mac,mac,sizeof(l2_mac_t)) == 0 ) {
        return key;
    } else {
        for (int i = 0;i< MAX_MAC; i++) {
            if (arr[i].prt == port) {
                if (arr[i].vln == vlan) {
                    if (memcmp(arr[i].mac,mac,sizeof(l2_mac_t)) == 0 ) {
                        return i;
                    }
                }
            }
        }
    }
}

void fill_mac(void) {
    int i = 0;
    int key = 0;
    for (i=0;i<MAX_MAC; i++) {
        arr[i].prt = 4;
        arr[i].vln = 10;
        arr[i].mac[5] = i%255;
        arr[i].mac[4] = i%65025;
        key = MacSum(arr[i].mac);
        L2Hash[key] = i;
    }
}

void init(void) {
    static int init = 0;
    if (init)
        return;
    L2Hash = (unsigned short*) malloc(0xffffffffffff*sizeof(unsigned short));
}

如需进一步更新问题，请向下滚动至第二个答案

【问题讨论】：

保持数组排序并使用二分搜索
只要数据结构是一个随机排列的数组，在找到匹配之前，你不能比线性搜索更好。如果您可以将数据结构更改为其他内容，那么您可以做得更好。
哈希图、跳过列表、二叉搜索树、sqlite 数据库等。可能性无穷无尽，取决于您需要和可以使用的内容（例如，内存、磁盘空间是否有限制，使用具有某些许可证的外部库）。但即使只是为了保持数组排序，也要考虑插入与查找相比的频率。对于单个查找，您可能必须遍历整个未排序的数组（实际上，如果没有匹配项，您总是必须这样做），而二进制搜索只需要一些比较，并且插入是二进制搜索 + 批量复制 + 可能重新分配。
好吧，对最大插槽一半的单个 memmove 进行基准测试，看看需要多少次遍历数组来抵消该成本，然后考虑插入频率 (memmove) 与频率查找次数以及查找没有结果的常见情况（= 最坏情况）。
另外，您的查询是随机的还是它们是否有关于先前查询的模式（例如，如果可能再次查询相同的条目，您可以缓存或移动它）或插入（例如，如果更有可能查询最新/最旧条目，请相应地更改搜索方向）。但也可以考虑实现一个简单的哈希映射。

标签： c arrays search

【解决方案1】：

风格说明：嵌套的if()s 很难阅读。有些人更喜欢：

int find_mac(int port, int vlan, l2_mac_t mac) {
    int i = 0;

    for (int i = 0;i< MAX_MAC; i++) {
        if (arr[i].prt != port) continue;
        if (arr[i].vln != vlan) continue;
        if (memcmp(arr[i].mac,mac,sizeof(l2_mac_t)) continue;
        //found
        return i;
    }
return WHAT; //!!11!!1
}

[这应该是注释，但我需要格式。]

【讨论】：

像这样的保护条件确实简化了你的循环。

【解决方案2】：

我遵循了上述一些建议，并提出了以下代码。我现在已将 MAC 的数量减少到 1000，但我已经收到了一些：Could not find MAC 消息。有人可以在这里帮助我吗？代码：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define MAX_MAC     1000
#define SHORT_INIT  0xFFFF

#define MacSum(x)   (x)[0]*(2^24) \
                   +(x)[1]*(2^20) \
                   +(x)[2]*(2^16) \
                   +(x)[3]*(2^12)\
                   +(x)[4]*(2^8)\
                   +(x)[5]

typedef unsigned char l2_mac_t[6];
typedef struct l2_s {
    int prt;
    int vln;
    l2_mac_t mac;
}l2_t;

static unsigned short l2hash[MAX_MAC]={0};

int find_mac(int port, int vlan, l2_mac_t mac);
void fill_mac_tab(void);
void init(void);
void mac_hash_add (int idx, l2_mac_t mac);

static l2_t arr[MAX_MAC] = {0};
//---------------------------------------------------------------------

int main (void) {
    int i = 0;
    int res = 0;
    init();
    fill_mac_tab();
    for (i=0;i<MAX_MAC;i++) {
        res = find_mac(arr[i].prt,arr[i].vln,arr[i].mac);
    }
}
//---------------------------------------------------------------------

void init(void) {
    int i = 0;
    for (i=0;i<MAX_MAC;i++)
        l2hash[i] = SHORT_INIT;
}
//---------------------------------------------------------------------

int find_mac(int port, int vlan, l2_mac_t mac) {
    int i = 0;
    int k = 0;

    k = (MacSum(mac))%MAX_MAC;
    if (memcmp(arr[k].mac,mac,sizeof(l2_mac_t)) == 0 ) {
        printf("Found MAC %02X:%02X:%02X:%02X:%02X:%02X at key %d\n",mac[0],mac[1],mac[2],mac[3],mac[4],mac[5],k);
        return k;
    } else {
        for (int i = k;i< MAX_MAC; i++ ) {
            if (arr[i].prt != port ) continue;
            if (arr[i].vln != vlan ) continue;
            if (memcmp( arr[i].mac,mac,sizeof(l2_mac_t) )) continue;
            printf("Found MAC %02X:%02X:%02X:%02X:%02X:%02X\n",mac[0],mac[1],mac[2],mac[3],mac[4],mac[5]);
            return i;
        }
    }
    printf("Could not find MAC %02X:%02X:%02X:%02X:%02X:%02X\n",mac[0],mac[1],mac[2],mac[3],mac[4],mac[5]);
    return -1;
}
//---------------------------------------------------------------------

void fill_mac_tab(void) {
    int i = 0;
    int o = 0;
    int key = 0;    
    for (i=0;i<MAX_MAC; i++) {
        // fill table
        arr[i].prt = 4;
        arr[i].vln = 10;       
        arr[i].mac[5] = i%255;
        if (i>255)
            arr[i].mac[4] = i%65025;
        mac_hash_add(i,arr[i].mac);
    }
}

void mac_hash_add (int idx, l2_mac_t mac) {
    int i = 0;
    int o = 0;
    int k = 0;
    k = (MacSum(arr[idx].mac))%MAX_MAC;
    printf("k %d\n",k);
    if(l2hash[k] == SHORT_INIT ) {
        l2hash[k] = i;
    } else {
        printf("k %d already used, find next\n",k);
        // find next empty spot in hash 
        for (o=k; o<MAX_MAC; o++) {
            if (l2hash[o] != SHORT_INIT ) continue;
            printf("using %d\n",o);
            l2hash[o] = i;
            return;
        }
        printf("unable to find empty key within range \n");
    }
}

上述的可行解决方案如下所示： 它仍然需要大量时间，但它比每次都线性搜索每个 MAC 的完整数组要好得多。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define MAX_MAC     32768
#define SHORT_INIT  0xFFFF
#define OK          0
#define ERROR       -1

#define MacSum(x)   (x)[0]*(2^24) \
                   +(x)[1]*(2^20) \
                   +(x)[2]*(2^16) \
                   +(x)[3]*(2^12)\
                   +(x)[4]*(2^8)\
                   +(x)[5]

typedef unsigned char l2_mac_t[6];
typedef struct l2_s {
    int prt;
    int vln;
    l2_mac_t mac;
}l2_t;

static unsigned short l2hash[MAX_MAC]={0};

int find_mac(int port, int vlan, l2_mac_t mac);
int fill_mac_tab(void);
void init(void);
int mac_hash_add (int idx, l2_mac_t mac);

static l2_t arr[MAX_MAC] = {0};
//---------------------------------------------------------------------

int main (void) {
    int i = 0;
    int rv = OK;
    init();
    printf("insert\n");
    rv = fill_mac_tab();
    if (rv) {
        printf("ERROR: fill_mac_tab() returned %d\n",rv);
        exit (rv);
    }
    printf("find\n");
    for (i=0;i<MAX_MAC;i++) {
        rv = find_mac(arr[i].prt,arr[i].vln,arr[i].mac);
        if (rv <0) {
            printf("ERROR: find_mac() returned %d\n",rv);
            exit(rv);
        }
    }
}
//---------------------------------------------------------------------

void init(void) {
    int i = 0;
    for (i=0;i<MAX_MAC;i++)
        l2hash[i] = SHORT_INIT;
}
//---------------------------------------------------------------------

int find_mac(int port, int vlan, l2_mac_t mac) {
    int i = 0;
    int k = 0;

    k = (MacSum(mac))%MAX_MAC;
    if (memcmp(arr[k].mac,mac,sizeof(l2_mac_t)) == 0 ) {
        //printf("Found MAC %02X:%02X:%02X:%02X:%02X:%02X at key %d\n",mac[0],mac[1],mac[2],mac[3],mac[4],mac[5],k);
        return k;
    } else {
        for (int i = k;i< MAX_MAC; i++ ) {
            if (arr[i].prt != port ) continue;
            if (arr[i].vln != vlan ) continue;
            if (memcmp( arr[i].mac,mac,sizeof(l2_mac_t) )) continue;
            //printf("Found MAC %02X:%02X:%02X:%02X:%02X:%02X\n",mac[0],mac[1],mac[2],mac[3],mac[4],mac[5]);
            return i;
        }
        //printf("continue search from bottom\n");
        for (int i = 0;i< k; i++ ) {
            if (arr[i].prt != port ) continue;
            if (arr[i].vln != vlan ) continue;
            if (memcmp( arr[i].mac,mac,sizeof(l2_mac_t) )) continue;
            //printf("Found MAC %02X:%02X:%02X:%02X:%02X:%02X\n",mac[0],mac[1],mac[2],mac[3],mac[4],mac[5]);
            return i;
        }
    }
    printf("Could not find MAC %02X:%02X:%02X:%02X:%02X:%02X\n",mac[0],mac[1],mac[2],mac[3],mac[4],mac[5]);
    return ERROR;
}
//---------------------------------------------------------------------

int fill_mac_tab(void) {
    int i = 0;
    int o = 0;
    int key = 0;    
    int rv = OK;
    for (i=0;i<MAX_MAC; i++) {
        // fill table
        arr[i].prt = 4;
        arr[i].vln = 10;       
        arr[i].mac[5] = i%255;
        if (i>255)
            arr[i].mac[4] = i%65025;
        rv = mac_hash_add(i,arr[i].mac);
    }
    return rv;
}

int mac_hash_add (int idx, l2_mac_t mac) {
    int i = 0;
    int o = 0;
    int k = 0;
    int rv = OK;
    k = (MacSum(arr[idx].mac))%MAX_MAC;
    //printf("k %d\n",k);
    if(l2hash[k] == SHORT_INIT ) {
        l2hash[k] = i;
    } else {
        //printf("k %d already used, find next\n",k);
        // find next empty spot in hash 
        for (o=k; o<MAX_MAC; o++) {
            if (l2hash[o] != SHORT_INIT ) continue;
            //printf("using %d\n",o);
            l2hash[o] = i;
            return OK;
        }
        //printf("Continue search in bottom half\n");
        for (o=0; o<k; o++) {
            if (l2hash[o] != SHORT_INIT ) continue;
            //printf("using %d\n",o);
            l2hash[o] = i;
            return OK;
        }
        //printf("unable to find empty key within range \n");
        rv = ERROR;
    }
    return rv;
}

【讨论】：

看起来您正在使用哈希来索引“arr”，而无需通过“l2hash”。
@Arkku arr 表示存储 MAC 的区域，l2hash 是散列值，应该提供每个 MAC 到 arr 的快速查找以找到相应的条目。我在find_mac() 中使用kk 尝试直接查找（memcmp() for verification），如果不正确，我会查看连续的条目，直到找到正确的条目。一旦达到顶部MAX_MAC，我必须从下往上执行搜索...
@Arkku 是的，有一些好处，它会首先尝试将条目存储在k，如果它没有成功（因为它已经被占用），它将增加并找到下一个条目- 搜索的可能性比每次通过MAX_MAC 线性搜索要快得多，这一点非常重要！
但是你没有使用这个好处，因为你的find_mac 坏了，这是我最初的评论试图说的。 =)
在 C 语言中有十亿加一个二叉树实现的示例，因此您必须四处寻找您喜欢的一个并将其用作应用程序的基础。我链接到的 Wikipedia 页面讨论了基础知识，并且如果您想尝试这些形式，还可以链接到更奇特的形式。大多数数据库索引使用某种形式的二叉树对数据进行排序，因为它们简单、快速且高效。