下面的函数fpmul_by_2() 实现了所需的功能,假设'unsigned long' 是32 位整数类型,'float' 是映射到IEEE-754 'binary32' 的32 位浮点类型。进一步假设我们要在禁用异常的情况下模拟 IEEE-754 乘法,从而产生标准规定的屏蔽响应。
使用了两个帮助函数,分别实现 32 位整数加法和相等比较。加法是基于二进制加法中和和进位的定义(详细解释见this previous question),而相等比较利用了(a^b) == 0iffa == b这一事实。
浮点参数的处理需要大致区分三类操作数:非正规和零、法线、无穷大和 NaN。法线的加倍是通过增加指数来实现的,因为我们在二进制浮点格式上进行操作。可能会发生溢出,在这种情况下必须返回无穷大。 Infinity 和 NaN 返回不变,除了 SNaN 被转换为 QNaN,这是 IEEE-754 规定的屏蔽响应。非正规和零是通过将有效数字加倍来处理的。对零、次正规和无穷大的处理可能会破坏符号位,因此参数的符号位被强制用于结果。
下面的测试框架对fpmul_by_2() 进行了详尽的测试,在现代PC 上只需几分钟。我在运行 Windows 的 x64 平台上使用了 Intel 编译器。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// assumptions:
// 'unsigned long' is a 32-bit type
// 'float' maps to IEEE-754 'binary32'. Exceptions are disabled
// add using definition of sum and carry bits in binary addition
unsigned long add (unsigned long a, unsigned long b)
{
unsigned long sum, carry;
carry = b;
do {
sum = a ^ carry;
carry = (a & carry) << 1;
a = sum;
} while (carry);
return sum;
}
// return 1 if a == b, else 0
int eq (unsigned long a, unsigned long b)
{
unsigned long t = a ^ b;
// OR all bits into lsb
t = t | (t >> 16);
t = t | (t >> 8);
t = t | (t >> 4);
t = t | (t >> 2);
t = t | (t >> 1);
return ~t & 1;
}
// compute 2.0f * a
unsigned long fpmul_by_2 (unsigned long a)
{
unsigned long expo_mask = 0x7f800000UL;
unsigned long expo_lsb = 0x00800000UL;
unsigned long qnan_mark = 0x00400000UL;
unsigned long sign_mask = 0x80000000UL;
unsigned long zero = 0x00000000UL;
unsigned long r;
if (eq (a & expo_mask, zero)) { // subnormal or zero
r = a << 1; // double significand
} else if (eq (a & expo_mask, expo_mask)) { // INF, NaNs
if (eq (a & ~sign_mask, expo_mask)) { // INF
r = a;
} else { // NaN
r = a | qnan_mark; // quieten SNaNs
}
} else { // normal
r = add (a, expo_lsb); // double by bumping exponent
if (eq (r & expo_mask, expo_mask)) { // overflow
r = expo_mask;
}
}
return r | (a & sign_mask); // result has sign of argument
}
float uint_as_float (unsigned long a)
{
float r;
memcpy (&r, &a, sizeof r);
return r;
}
unsigned long float_as_uint (float a)
{
unsigned long r;
memcpy (&r, &a, sizeof r);
return r;
}
int main (void)
{
unsigned long res, ref, a = 0;
do {
res = fpmul_by_2 (a);
ref = float_as_uint (2.0f * uint_as_float (a));
if (res != ref) {
printf ("error: a=%08lx res=%08lx ref=%08lx\n", a, res, ref);
return EXIT_FAILURE;
}
a++;
} while (a);
printf ("test passed\n");
return EXIT_SUCCESS;
}