哈希数据结构是一种非常简单,实用的数据结构。原理是将数据通过一定的hash函数规则,然后存储起来。使查找的时间复杂度近似于O(1)。进而大大节省了程序的运行时间。
哈希表的原理如图
原来的数据可以直接通过哈希函数存储起来,这样在搜索的时候,等于每一个数据都有了自己的特定查找号码,所以在查找时,可以通过哈希函数,一步直接找到(不考虑冲突)。所以时间复杂度,接近O(1)。
但是哈希最大的困扰就是哈希函数在解决问题是会出现冲突,例如说,将16和32存在余数为0-7的地方,那么16先进去,余数为0,那么32再进去是余数还是0,怎么解决这一问题呢?这里引进了两种方法。
(开放定址)线性探测法

32原本也应该放在余数为0的位置,但是余数为0的地方已经有数字了,就向后一位放到余数为1的位置。假设下一次放64,那么64就放到余数为2的地方,这样查找64,主需要3次,也就找到了。复杂度还是比单链表的O(n)要快很多。用代码来表示是这样的。
头文件
#define _CRT_SECURE_NO_WARNINGS 1
#pragma once
#include <stdio.h>
typedef int Key;
typedef enum//状态机,用于记录位置存储数据的状态
{
EMPTY,//空的
EXIST,//有数据的
DELETED//删除过得
}State;
typedef struct Element//状态
{
Key key;
State state;
}Element;
typedef int(*HashFuncType)(Key key, int capacity);
typedef struct HashTable
{
Element * table;
int size;
int capacity;//容量
HashFuncType HashFunc;
}HashTable;
int HashSearch(HashTable *pHT, Key key);
void HashDestroy(HashTable *pHT);
void HashInit(HashTable *pHT, int capaicity, HashFuncType HashFunc);
int mod(Key key, int capacity);
void ExpandIfRequired(HashTable *pHT);
int HashInsert(HashTable *pHT, Key key);
int HashRemove(HashTable *pHT, Key key);
在扩容的地方引入了一个负载因子的概念,负载因子 = 哈希表中元素个数/散列表的长度,一般大小定义在0.7到0.8之间,超过0.8会影响哈希表的效率。但是增大负载因子的数值可以减少哈希表所占内存空间。反之减少负载因子的数值可以增加搜索效率。
函数的.c文件
#define _CRT_SECURE_NO_WARNINGS 1
#include "Hash.h"
#include <stdio.h>
#include <assert.h>
void HashInit(HashTable *pHT, int capaicity, HashFuncType HashFunc)
{
pHT->table = (Element *)malloc(sizeof(Element)* capaicity);
assert(pHT->table);
pHT->size = 0;
pHT->capacity = capaicity;
pHT->HashFunc = HashFunc;
for (int i = 0; i < capaicity; i++)
{
pHT->table[i].state = EMPTY;
}
}
void HashDestroy(HashTable *pHT)
{
free(pHT->table);
}
int HashSearch(HashTable *pHT, Key key)
{
int index = pHT->HashFunc(key, pHT->capacity);
while (pHT->table[index].state != EMPTY)
{
if (pHT->table[index].key == key&&pHT->table[index].state == EXIST)
{
return index;
}//如果哈希表存满,这里就是死循环,但是哈希表不会被存满
index = (index + 1) % pHT->capacity;//便于返回第一个
}
return -1;
}
int mod(Key key, int capacity)
{
return key % capacity;
}
void ExpandIfRequired(HashTable *pHT)
{
int i = 0;
if (pHT->size * 10 / pHT->capacity < 7)//引用负载因子,保证冲突率尽量低
{
return;
}
/*int newCapacity = pHT->capacity * 2;
Element * newTable = (Element *)malloc(sizeof(Element)* newCapacity);
assert(newTable);
for (i = 0; i < newCapacity; i++)
{
newTable[i].state = EMPTY;
}
free(pHT->table);
pHT->table = newTable;
pHT->capacity = newCapacity;*///数据搬移太麻烦
HashTable newHT;
HashInit(&newHT, pHT->capacity * 2, pHT->HashFunc);
for (i = 0; i < pHT->capacity; i++)
{
if (pHT->table[i].state == EXIST)
{
HashInsert(&newHT, pHT->table[i].key);
}
}
free(pHT->table);
pHT->table = newHT.table;
pHT->capacity = newHT.capacity;
}
int HashInsert(HashTable *pHT, Key key)
{
ExpandIfRequired(pHT);//扩容
int index = pHT->HashFunc(key, pHT->capacity);
while (1)
{
if (pHT->table[index].key == key && pHT->table[index].state == EXIST)
{
return -1;
}
if (pHT->table[index].state != EXIST)
{
pHT->table[index].key = key;
pHT->table[index].state = EXIST;
pHT->size++;
return 0;
}
index = (index + 1) % pHT->capacity;
}
}
int HashRemove(HashTable *pHT, Key key)
{
int index = pHT->HashFunc(key, pHT->capacity);
while (pHT->table[index].state != EMPTY)
{
if (pHT->table[index].key == key && pHT->table[index].state == EXIST)
{
pHT->table[index].state = DELETED;
return 0;
}
index = (index + 1) % pHT->capacity;
}
return -1;
}
哈希桶
哈希桶就是将数组和链表结合起来解决哈希函数的冲突问题,原理用俗话讲,就是数组里存的的是一个链表的地址,16,放进去,在放32进去,找的时候余数为0,就从16,往后继续找,直到找到32为止。画个图。
首先是头文件
#define _CRT_SECURE_NO_WARNINGS 1
typedef int Key;
typedef struct Node
{
Key key;
struct Node * Next;
}Node;
typedef struct HashBucket
{
int size;
int capacity;
Node ** array;
}HashBucket;
void HashBucketInit(HashBucket *pHB, int capacity);
void HashBucketDestroy(HashBucket *pHB);
void ListDestroy(Node *first);
Node * HashBucketSearch(HashBucket *pHB, Key key);
void ExpandIfRequired1(HashBucket *pHB);
int HashBucketInsert(HashBucket *pHB, Key key);
int HashBucketRemove(HashBucket *pHB, Key key);
然后是.c文件
#define _CRT_SECURE_NO_WARNINGS 1
#include "HashBucket.h"
#include <stdio.h>
#include <stdlib.h>
void HashBucketInit(HashBucket *pHB, int capacity)
{
pHB->array = (Node **)malloc(sizeof(Node *)*capacity);
for (int i = 0; i < capacity; i++) {
pHB->array[i] = NULL; // 空链表
}
pHB->capacity = capacity;
pHB->size = 0;
}
void ListDestroy(Node *first)
{
Node *next;
Node *cur;
for (cur = first; cur != NULL; cur = next)
{
next = cur->Next;
free(cur);
}
}
void HashBucketDestroy(HashBucket *pHB)
{
int i = 0;
for (i = 0; i < pHB->capacity; i++)
{
ListDestroy(pHB->array[i]);
}
free(pHB->array);
}
Node * HashBucketSearch(HashBucket *pHB, Key key)
{
int index = key % pHB->capacity;
Node *cur = pHB->array[index];
while(cur != NULL)
{
if (cur->key ==key)
{
return cur;
}
cur = cur->Next;
}
return NULL;
}
void ExpandIfRequired1(HashBucket *pHB)
{
int i = 0;
Node *node;
if (pHB->size < pHB->capacity)
{
return;
}
HashBucket NB;
HashBucketInit(&NB, pHB->capacity * 2);
for (i = 0; i < pHB->capacity; i++)
{
for (node = pHB->array[i]; node != NULL; node = node->Next)
{
HashBucketInsert(&NB, node->key);
}
}
HashBucketDestroy(pHB);
pHB->array = NB.array;
pHB->capacity = NB.capacity;
}
int HashBucketInsert(HashBucket *pHB, Key key)
{
ExpandIfRequired1(pHB);
if (HashBucketSearch(pHB, key) != NULL)
{
return -1;
}
int index = key % pHB->capacity;
Node *first = pHB->array[index];
Node *node = (Node *)malloc(sizeof(Node));
node->key = key;
node->Next = NULL;
first = node->Next;
pHB->array[index] = node;
pHB->size++;
return 0;
}
int HashBucketRemove(HashBucket *pHB, Key key)
{
int index = key % pHB->capacity;
Node *prev = NULL;
Node *cur = pHB->array[index];
while (cur != NULL)
{
if (cur->key == key)
{
if (prev == NULL)
{
pHB->array[index] = cur->Next;
}
else
{
prev->Next = cur->Next;
}
free(cur);
}
prev = cur;
cur = cur->Next;
}
return -1;
}
哈希函数对字符串的处理。
第一步是把字节流转换成一个数字
第二步拿着数字然后再用除留余数法
unsigned int RKDR(const char *str)
{
unsigned int seed = 131;//很巧妙的保证了字符串次序的问题。
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return hash & 0x7FFFFFFF;
}
这就是我对哈希函数的理解,希望各位大佬指正