【发布时间】:2015-11-15 15:05:35
【问题描述】:
我写了一个 C 函数来检查两个给定的字符串(C 风格)是否是字谜。我尝试使用 Frama-C 对其进行验证,但它无法验证函数的最终行为(其他规范有效)。 第一个超时(即使在 WP 中有非常高的超时值),第二个是未知的。
代码如下:
#include <string.h>
//@ ghost char alphabet[26] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
/*@
// Takes a character and return it to lowercase if it's uppercase
axiomatic ToLower
{
logic char to_lower(char c);
axiom lowercase:
\forall char c; 97 <= c <= 122 ==> to_lower(c) == c;
axiom uppercase:
\forall char c; 65 <= c <= 90 ==> to_lower(c) == to_lower((char) (c+32));
}
*/
/*@
// Count the occurences of character 'c' into 'string' that is long 'n' characters
axiomatic CountChar
{
logic integer count_char(char* string, integer n, char c);
axiom count_zero:
\forall char* string, integer n, char c; n <= 0 ==>
count_char(string, n, c) == 0;
axiom count_hit:
\forall char* string, integer n, char c; n >= 0 && to_lower(string[n]) == c ==>
count_char(string, n+1, c) == count_char(string, n, c) + 1;
axiom count_miss:
\forall char* string, integer n, char c; n >= 0 && to_lower(string[n]) != c ==>
count_char(string, n+1, c) == count_char(string, n, c);
}
*/
/*@
predicate are_anagrams{L}(char* s1, char* s2) = ( \forall integer i; 0 <= i < 26 ==>
count_char(s1, strlen(s1), alphabet[i]) == count_char(s2, strlen(s2), alphabet[i]) );
*/
/*@
requires valid_string(a);
requires valid_string(b);
// Requires that strings 'a' and 'b' are composed only by alphabet's letters and that are long equally.
requires \forall integer k; 0 <= k < strlen(a) ==> 65 <= a[k] <= 90 || 97 <= a[k] <= 122;
requires \forall integer k; 0 <= k < strlen(b) ==> 65 <= b[k] <= 90 || 97 <= b[k] <= 122;
requires strlen(a) == strlen(b);
ensures 0 <= \result <= 1;
assigns \nothing;
behavior anagrams:
assumes are_anagrams(a, b);
ensures \result == 1;
behavior not_anagrams:
assumes !are_anagrams(a, b);
ensures \result == 0;
complete behaviors anagrams, not_anagrams;
disjoint behaviors anagrams, not_anagrams;
*/
int check_anagram(const char a[], const char b[])
{
// Create two arrays and initialize them to zero
int first[26];
int second[26];
int c;
/*@
loop assigns first[0..(c-1)];
loop assigns second[0..(c-1)];
loop assigns c;
loop invariant 0 <= c <= 26;
loop invariant \forall integer k; 0 <= k < c ==> second[k] == first[k];
loop invariant \forall integer k; 0 <= k < c ==> first[k] == 0 && second[k] == 0;
loop invariant \valid(first+(0..25)) && \valid(second+(0..25));
loop variant 26-c;
*/
for(c = 0; c < 26; c++)
{
first[c] = 0;
second[c] = 0;
}
char tmp = 'a';
c = 0;
// Now increment the array position related to position of character occured in the alphabet, subtracting ASCII decimal value of character from the character.
/*@
loop assigns first[0..25];
loop assigns tmp;
loop assigns c;
loop invariant 97 <= tmp <= 122;
loop invariant \valid(first+(0..25));
loop invariant strlen(\at(a, Pre)) == strlen(\at(a, Here));
loop invariant 0 <= c <= strlen(a);
loop variant strlen(a)-c;
*/
while (a[c] != '\0')
{
// This is a little trick to lowercase if the char is uppercase.
tmp = (a[c] > 64 && a[c] < 91) ? a[c]+32 : a[c];
first[tmp-97]++;
c++;
}
c = 0;
// Doing the same thing on second string.
/*@
loop assigns second[0..25];
loop assigns tmp;
loop assigns c;
loop invariant 97 <= tmp <= 122;
loop invariant \valid(second+(0..25));
loop invariant strlen(\at(b, Pre)) == strlen(\at(b, Here));
loop invariant 0 <= c <= strlen(b);
loop variant strlen(b)-c;
*/
while (b[c] != '\0')
{
tmp = (b[c] > 64 && b[c] < 91) ? b[c]+32 : b[c];
second[tmp-'a']++;
c++;
}
// And now compare the arrays containing the number of occurences to determine if strings are anagrams or not.
/*@
loop invariant strlen(\at(a, Pre)) == strlen(\at(a, Here));
loop invariant strlen(\at(b, Pre)) == strlen(\at(b, Here));
loop invariant 0 <= c <= 26;
loop assigns c;
loop variant 26-c;
*/
for (c = 0; c < 26; c++)
{
if (first[c] != second[c])
return 0;
}
return 1;
}
【问题讨论】:
-
我不知道框架,但
loop invariant 0 <= c <= 26;和for(c = 0; c < 26; c++)感觉不对:一个说 0...26,另一个说 0...25。另外,上面几行first和second的“声明”(?)应该是0..(c-1)还是0..25? -
@TripeHound 每次迭代的不变量都必须为真,包括因条件变为假而退出循环的那个。对于 for 循环
for(c = 0; c < 26; c++) …,不变量0 ≤ c ≤ 26是正确的。循环退出时保证的属性是“Invariant and not Condition”,在这种情况下是“0 ≤ c ≤ 26 而不是 c -
@TripeHound
loop assigns first[0..(c-1)];也是正确的:在任何迭代开始时,只有数组first的元素 0 到 c-1 已被循环分配:blog.frama-c.com/index.php?post/2010/10/06/…跨度> -
@PascalCuoq 谢谢你……这[差不多]是有道理的。
标签: c static-analysis anagram frama-c