【发布时间】:2021-12-19 07:24:49
【问题描述】:
我需要高度优化的 CRC8 算法。我的目标是开发 CRC16 / CRC32 中已知的 Slice-by-4 解决方案。我想让代码尽可能接近我在下面发布的用于 CRC16 的解决方案。
生成 CRC 查找表的函数:
void crcspeed16_genTable(crcfn16 crcfn, uint16_t table[8][256]) {
uint16_t crc;
/* generate CRCs for all single byte sequences */
for (int n = 0; n < 256; n++) {
table[0][n] = crcfn(0, &n, 1);
}
/* generate nested CRC table for future slice-by-8 lookup */
for (int n = 0; n < 256; n++) {
crc = table[0][n];
for (int k = 1; k < 8; k++) {
crc = table[0][(crc >> 8) & 0xff] ^ (crc << 8);
table[k][n] = crc;
}
}
}
uint16_t crc16(uint16_t crc, const void *in_data, uint64_t len) {
const uint8_t *data = (const uint8_t*) in_data;
for (uint64_t i = 0; i < len; i++) {
crc = crc ^ (data[i] << 8);
for (int j = 0; j < 8; j++) {
if (crc & 0x8000) {
crc = (crc << 1) ^ CRC16_POLYNOMINAL;
} else {
crc = (crc << 1);
}
}
}
return crc;
}
调用生成表格:
crcspeed16_genTable(crc16, crc16_LUT);
基于 Slice-by-4 方案生成 CRC16 的函数:
uint16_t crc16_slice4(const void *buf, size_t len, uint16_t initialValue, uint16_t XOR_OUT) {
uint16_t crc = initialValue;
unsigned char *next = (unsigned char *)buf;
// process individual bytes until we reach an 8-byte aligned pointer
while (len && ((uintptr_t)next & 7) != 0) {
crc = crc16_LUT[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8);
len--;
}
// fast middle processing, 4 bytes (aligned!) per loop */
while (len >= 4) {
uint32_t n = *(uint32_t *)next;
crc = crc16_LUT[3][(n & 0xff) ^ ((crc >> 8) & 0xff)] ^
crc16_LUT[2][((n >> 8) & 0xff) ^ (crc & 0xff)] ^
crc16_LUT[1][(n >> 16) & 0xff] ^
crc16_LUT[0][n >> 24];
next += 4;
len -= 4;
}
// process remaining bytes (can't be larger than 8)
while (len) {
crc = crc16_LUT[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8);
len--;
}
return crc ^ XOR_OUT;
}
我的目标是调整算法以适用于 CRC8 和 CRC4。到目前为止,我所做的是将 LUT-Generator 更改为生成 LUT 的有效第一行并根据此 LUT 数据处理有效的 CRC。我未能调整中间部分来计算和利用 CRC 表的全部潜力。
为 CRC8 改编的功能(不完全功能): 表格生成:
void crcspeed8_genTable(crcfn8 crcfn, uint8_t table[8][256]) {
uint16_t crc;
/* generate CRCs for all single byte sequences */
for (int n = 0; n < 256; n++) {
table[0][n] = crcfn(0, &n, 1);
}
/* generate nested CRC table for future slice-by-8 lookup */
for (int n = 0; n < 256; n++) {
crc = table[0][n];
for (int k = 1; k < 8; k++) {
//crc = table[0][crc] ^ crc;
crc = table[0][(crc >> 4) & 0x0f] ^ (crc << 4);
table[k][n] = crc;
}
}
}
uint8_t crc8(uint8_t crc, const void *in_data, uint64_t len) {
const uint8_t *data = (const uint8_t*) in_data;
for (uint64_t i = 0; i < len; i++) {
//crc = crc ^ (data[i] << 8);
crc = crc ^ data[i];
for (int j = 0; j < 8; j++) {
if (crc & 0x80) {
crc = (crc << 1) ^ CRC8_POLYNOMINAL;
} else {
crc = (crc << 1);
}
}
}
return crc;
}
CRC 计算:
uint8_t crc8_slice4(const void *buf, size_t len, uint8_t initialValue, uint8_t XOR_OUT) {
uint8_t crc = initialValue;
unsigned char *next = (unsigned char *)buf;
// process individual bytes until we reach an 8-byte aligned pointer
while (len && ((uintptr_t)next & 7) != 0) {
printf("\nAlign processing");
crc = crc8_LUT[0][crc ^ *next++];
len--;
}
//fast middle processing, 4 bytes (aligned!) per loop
while (len >= 4) {
printf("\nSlice processing");
uint32_t n = *(uint32_t *)next;
//This part should be adopted to work for CRC8
/*crc = crc8_LUT[3][(n & 0xff) ^ crc] ^
crc8_LUT[2][(n >> 8) & 0xff] ^
crc8_LUT[1][(n >> 16) & 0xff] ^
crc8_LUT[0][n >> 24]; */
uint32_t n0 = (n & 0xFF) ^ crc;
uint32_t n1 = (n >> 8) & 0xFF;
uint32_t n2 = (n >> 16) & 0xFF;
uint32_t n3 = (n >> 24);
//Working multi step for CRC 4 only using first row of LUT
uint8_t crc0 = crc8_LUT[0][crc ^ n0];
uint8_t crc1 = crc8_LUT[0][crc0 ^ n1];
uint8_t crc2 = crc8_LUT[0][crc1 ^ n2];
uint8_t crc3 = crc8_LUT[0][crc2 ^ n3];
crc = crc3;
next += 4;
len -= 4;
}
// process remaining bytes (can't be larger than 8)
while (len) {
printf("\nRemain processing");
crc = crc8_LUT[0][crc ^ *next++];
len--;
}
return crc ^ XOR_OUT;
}
我尝试将函数更改为适用于 CRC8,但我无法弄清楚中间部分。解释为各种CRC(4/8/16/24/32 ...)生成查找表的一般方法的解决方案也将受到高度赞赏。 感谢您为我指明正确的方向。
【问题讨论】: