我正在尝试逐行读取包含“Jane 30”、“Chris 40”等字符串的文件中的输入
您正在读取的文件可能包含“Jane 30”、“Chris 40”等字符串;但也可能包含数百万个打字错误和/或其他错误;因此,您需要检测错误并清楚地告知用户错误是什么,以便他们可以轻松理解问题,然后找到错误,然后修复它。
因此,没有任何 C 库函数有用。
而是将解析器构建为有限状态机。例如(未经测试):
// State
int state = 0;
int column = 0;
int line = 1;
char current_name[MAX_NAME_LENGTH];
int name_length;
int number;
// Main loop
for(;;) {
int c = fgetc(file);
column++;
switch(state) {
case 0: /* At start of new line */
if(c == FEOF) {
return OK;
} else if(isdigit(c)) {
printf("ERROR: Number found at start of line (missing name), on line %d at column %d\n", line, column);
return NOT_OK;
} else if(isalpha(c)) {
name_length = 0;
current_name[name_length++] = c;
state = 1;
} else if(c == '\n') {
line++
} else if(isspace(c)) {
} else {
printf("ERROR: Bad character at start of line, on line %d at column %d\n", line, column);
return NOT_OK;
}
break;
case 1: /* In middle of name */
if(c == FEOF) {
printf("ERROR: File ends in the middle of a name, on line %d at column %d\n", line, column);
return NOT_OK;
} else if(isdigit(c)) {
printf("ERROR: No whitespace between name and number, on line %d at column %d\n", line, column);
return NOT_OK;
} else if(isalpha(c)) {
if(name_length >= MAX_NAME_LENGTH) {
printf("ERROR: Name too long (max length is %d), on line %d at column %d\n", MAX_NAME_LENGTH, line, column);
return NOT_OK;
}
current_name[name_length++] = c;
} else if(c == '\n') {
printf("ERROR: No number after name, on line %d at column %d\n", line, column);
return NOT_OK;
} else if(isspace(c)) {
state = 2;
} else {
printf("ERROR: Bad character in middle of name, on line %d at column %d\n", line, column);
return NOT_OK;
}
break;
case 2: /* Between name and number */
if(c == FEOF) {
printf("ERROR: File ends after name, on line %d at column %d\n", line, column);
return NOT_OK;
} else if(isdigit(c)) {
number = c - '0';
state = 3;
} else if(c == '\n') {
printf("ERROR: No number after name, on line %d at column %d\n", line, column);
return NOT_OK;
} else if(isspace(c)) {
} else {
printf("ERROR: Bad character after name, on line %d at column %d\n", line, column);
return NOT_OK;
}
break;
case 4: /* In middle of number */
if(c == FEOF) {
printf("ERROR: File ends in middle of number, on line %d at column %d\n", line, column);
return NOT_OK;
} else if(isdigit(c)) {
if(number > INT_MAX / 10) {
printf("ERROR: Number is too large, on line %d at column %d\n", line, column);
return NOT_OK;
}
number *= 10;
if(number > INT_MAX - (c - '0') ) {
printf("ERROR: Number is too large, on line %d at column %d\n", line, column);
return NOT_OK;
}
number += c - '0';
} else if(c == '\n') {
create_new_entry(current_name, name_length, number);
line++
state = 0;
} else if(isspace(c)) {
state = 5;
} else {
printf("ERROR: Bad character after number, on line %d at column %d\n", line, column);
return NOT_OK;
}
break;
case 5: /* Trailing white space before end of line */
if(c == FEOF) {
printf("ERROR: File ends between number and end of line, on line %d at column %d\n", line, column);
return NOT_OK;
} else if(c == '\n') {
line++
create_new_entry(current_name, name_length, number);
state = 0;
} else if(isspace(c)) {
} else {
printf("ERROR: Unknown characters between number and end of line, on line %d at column %d\n", line, column);
return NOT_OK;
}
}
}