您的getch() 和ungetch() 代码看起来大致正确(我没有测试它们,但它们看起来正确)。如果您要报告错误,最好在stderr 报告错误,使用类似于:
fprintf(stderr, "ungetch: too many characters (could not push back %c)\n", c);
您可能还考虑升级ungetch() 以返回成功/失败指示而不是什么,然后从函数中省略打印;这允许调用代码决定如何报告错误。
ungets() 函数似乎在压入一个计数器值,而不是一个字符。您还应该避免在循环条件下调用strlen();它将线性算法转换为二次算法。
void ungets(const char *s)
{
int c;
while ((c = *s++) != '\0')
ungetch(c);
}
const 向调用者保证该函数不会修改传递的字符串。这段代码继续往回推直到字符串的结尾,因为ungetch() 没有告诉它是时候停止了。传入一个 20 KiB 的字符串,它会生成 19.9 KiB 的消息;可能不是一个好主意。
Henrik Carlqvist 使精明的observation:
要考虑的另一件事是ungets 字符串中的字符存储在缓冲区中的顺序以及它们被读出的顺序。由于当前的 getch 函数以相反的顺序将字符作为堆栈读出,因此您可能希望将字符串存储在该堆栈上,最右边的字符在前,最左边的字符在最后。
当然,Henrik 是对的。所以,ungets() 中的代码需要升级,并且需要字符串长度——或者至少必须找到字符串的结尾。
我将假设 ungetch() 函数的一个变体,它返回一个 int,当缓冲区中没有空间和一些其他值(零或推回的字符,但代码赢了)时产生 EOF 't care which)当它成功时,这样循环可以在必要时提前终止。
ungets() 功能也将升级,以报告 EOF 错误或回退的字符数。
int ungets(const char *s)
{
int len = strlen(s);
for (int i = len; i > 0; i--)
{
if (ungetch(s[i-1]) == EOF)
return EOF;
}
return len;
}
我编写了如图所示的循环,因此如果将int 更改为size_t(无符号类型),它仍然可以正常工作。只要使用有符号整数类型,就可以改用这个循环:
for (int i = len - 1; i >= 0; i--)
{
if (ungetch(s[i]) == EOF)
return EOF;
}
或者使用while循环:
size_t len = strlen(s);
while (len > 0)
{
if (ungetch(s[--len]) == EOF)
return EOF;
}
使用有符号整数更简单,但如果数据大小足够大(2 GiB 或更大),则可能更容易出现整数溢出问题。这通常不是实际问题。如果是,您会知道问题并相应地编写代码。
tinky_winkyasked:
你能提供一个测试用例吗?
我想是的。这就是我最终的结果。它的演员阵容比我想要的要多,但它可以在我默认的严格编译选项(源文件ugcs.c)下干净地编译:
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define DIM(x) (sizeof(x)/sizeof((x)[0]))
extern int getch(void);
extern int ungetch(int c);
extern int ungets_0(const char *s);
extern int ungets_1(const char *s);
extern int ungets_2(const char *s);
extern int ungets_3(const char *s);
extern int ungets_4(const char *s);
/* Implementation */
enum { BUFSIZE = 100 };
static char buf[BUFSIZE];
static int bufp = 0; /* next free position in buf */
int getch(void) /* get a (possibly pushed back) character */
{
assert(bufp >= 0 && bufp <= (int)sizeof(buf));
return (bufp > 0) ? buf[--bufp] : getchar();
}
int ungetch(int c) /* push character back on input */
{
if (bufp >= (int)sizeof(buf))
return EOF;
else
buf[bufp++] = c;
return c;
}
static void dump_pushback(void)
{
printf("PBB: L = %d: [%.*s]\n", bufp, bufp, buf);
}
/* Viable but quadratic because of repeated strlen() */
/* Pushes characters in wrong order */
int ungets_0(const char *s)
{
int c;
for (c = 0; c < (int)strlen(s); c++)
{
// Bogus: if (ungetch(c) == EOF)
if (ungetch(s[c]) == EOF)
{
fprintf(stderr, "ungetch() failed on %d '%c'\n", c, c);
dump_pushback();
return EOF;
}
}
dump_pushback();
return 0;
}
/* One workable solution */
int ungets_1(const char *s)
{
int len = (int)strlen(s);
for (int i = len; i > 0; i--)
{
if (ungetch(s[i - 1]) == EOF)
{
fprintf(stderr, "ungetch() failed on %d '%c'\n", s[i-1], s[i-1]);
dump_pushback();
return EOF;
}
}
dump_pushback();
return len;
}
/* Using size_t instead of int */
int ungets_2(const char *s)
{
size_t len = strlen(s);
for (size_t i = len; i > 0; i--)
{
if (ungetch(s[i - 1]) == EOF)
{
fprintf(stderr, "ungetch() failed on %d '%c'\n", s[i-1], s[i-1]);
dump_pushback();
return EOF;
}
}
dump_pushback();
return len;
}
/* Only works with signed int */
int ungets_3(const char *s)
{
int len = strlen(s);
for (int i = len - 1; i >= 0; i--)
{
if (ungetch(s[i]) == EOF)
{
fprintf(stderr, "ungetch() failed on %d '%c'\n", s[len], s[len]);
dump_pushback();
return EOF;
}
}
dump_pushback();
return len;
}
/* Using size_t and a while loop */
int ungets_4(const char *s)
{
size_t len = strlen(s);
while (len > 0)
{
if (ungetch(s[--len]) == EOF)
{
fprintf(stderr, "ungetch() failed on %d '%c'\n", s[len], s[len]);
dump_pushback();
return EOF;
}
}
dump_pushback();
return len;
}
/*
** ungetters: a constant array of pointers to functions; each function
** returns an int and takes a constant char pointer argument. In theory,
** the typedef isn't necessary. In practice, it saves your sanity.
*/
typedef int (*UngetStr)(const char *);
static const UngetStr ungetters[] =
{
ungets_0, ungets_1, ungets_2,
ungets_3, ungets_4
};
int main(int argc, char **argv)
{
UngetStr ungets = ungets_1;
int index;
if (argc > 1 && (index = atoi(argv[1])) >= 0 && index < (int)DIM(ungetters))
{
printf("Using function ungets_%d\n", index);
ungets = ungetters[index];
}
char buffer[32];
index = 0;
int i;
int c;
for (i = 0; i < 40; i++)
{
if ((c = getch()) == EOF)
{
printf("%d: got EOF\n", i);
break;
}
printf("%d: got %3d '%c'\n", i, c, (isprint(c) ? c : '.'));
buffer[index++] = c;
if (i % 3 == 2)
{
printf("%d: ungetting %3d '%c'\n", i, c, (isprint(c) ? c : '.'));
ungetch(c);
}
else if (i % 7 == 6)
{
buffer[index/2] = '\0';
printf("%d: ungetting string [%s]\n", i, buffer);
ungets(buffer);
index = 0;
}
}
while ((c = getch()) != EOF)
printf("%d: got %3d '%c'\n", i++, c, (isprint(c) ? c : '.'));
return 0;
}
编译(在 Mac OS X 10.10.5 上使用 GCC 5.1.0):
$ gcc -O3 -g -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
> -Wold-style-definition -Werror ugcs.c -o ugc
$
示例数据文件 (data):
abcdefg
ABCDEFGHIJKL
ugcs < data 的示例输出:
0: got 97 'a'
1: got 98 'b'
2: got 99 'c'
2: ungetting 99 'c'
3: got 99 'c'
4: got 100 'd'
5: got 101 'e'
5: ungetting 101 'e'
6: got 101 'e'
6: ungetting string [abc]
PBB: L = 3: [cba]
7: got 97 'a'
8: got 98 'b'
8: ungetting 98 'b'
9: got 98 'b'
10: got 99 'c'
11: got 102 'f'
11: ungetting 102 'f'
12: got 102 'f'
13: got 103 'g'
13: ungetting string [abb]
PBB: L = 3: [bba]
14: got 97 'a'
14: ungetting 97 'a'
15: got 97 'a'
16: got 98 'b'
17: got 98 'b'
17: ungetting 98 'b'
18: got 98 'b'
19: got 10 '.'
20: got 65 'A'
20: ungetting 65 'A'
21: got 65 'A'
22: got 66 'B'
23: got 67 'C'
23: ungetting 67 'C'
24: got 67 'C'
25: got 68 'D'
26: got 69 'E'
26: ungetting 69 'E'
27: got 69 'E'
27: ungetting string [aabbb
A]
PBB: L = 7: [A
bbbaa]
28: got 97 'a'
29: got 97 'a'
29: ungetting 97 'a'
30: got 97 'a'
31: got 98 'b'
32: got 98 'b'
32: ungetting 98 'b'
33: got 98 'b'
34: got 98 'b'
34: ungetting string [aaa]
PBB: L = 5: [A
aaa]
35: got 97 'a'
35: ungetting 97 'a'
36: got 97 'a'
37: got 97 'a'
38: got 97 'a'
38: ungetting 97 'a'
39: got 97 'a'
40: got 10 '.'
41: got 65 'A'
42: got 70 'F'
43: got 71 'G'
44: got 72 'H'
45: got 73 'I'
46: got 74 'J'
47: got 75 'K'
48: got 76 'L'
49: got 10 '.'
$
测试所有算法:
$ for i in $(seq 0 4); do ugcs $i < data > ugcs-$i.out; done
$ ls -l ugcs-?.out
-rw-r--r-- 1 jleffler staff 1286 Aug 16 14:42 ugcs-0.out
-rw-r--r-- 1 jleffler staff 1286 Aug 16 14:42 ugcs-1.out
-rw-r--r-- 1 jleffler staff 1286 Aug 16 14:42 ugcs-2.out
-rw-r--r-- 1 jleffler staff 1286 Aug 16 14:42 ugcs-3.out
-rw-r--r-- 1 jleffler staff 1286 Aug 16 14:42 ugcs-4.out
$ diff ugcs-1.out ugcs-2.out
1c1
< Using function ungets_1
---
> Using function ungets_2
$ diff ugcs-1.out ugcs-3.out
1c1
< Using function ungets_1
---
> Using function ungets_3
$ diff ugcs-1.out ugcs-4.out
1c1
< Using function ungets_1
---
> Using function ungets_4
$ diff ugcs-0.out ugcs-4.out
1c1
< Using function ungets_0
---
> Using function ungets_4
12,13c12,13
< PBB: L = 3: [abc]
< 7: got 99 'c'
---
> PBB: L = 3: [cba]
> 7: got 97 'a'
17c17
< 10: got 97 'a'
---
> 10: got 99 'c'
22,26c22,26
< 13: ungetting string [cbb]
< PBB: L = 3: [cbb]
< 14: got 98 'b'
< 14: ungetting 98 'b'
< 15: got 98 'b'
---
> 13: ungetting string [abb]
> PBB: L = 3: [bba]
> 14: got 97 'a'
> 14: ungetting 97 'a'
> 15: got 97 'a'
28,30c28,30
< 17: got 99 'c'
< 17: ungetting 99 'c'
< 18: got 99 'c'
---
> 17: got 98 'b'
> 17: ungetting 98 'b'
> 18: got 98 'b'
43c43
< 27: ungetting string [bbbcc
---
> 27: ungetting string [aabbb
45,54c45,54
< PBB: L = 7: [bbbcc
< A]
< 28: got 65 'A'
< 29: got 10 '.'
< 29: ungetting 10 '.'
< 30: got 10 '.'
< 31: got 99 'c'
< 32: got 99 'c'
< 32: ungetting 99 'c'
< 33: got 99 'c'
---
> PBB: L = 7: [A
> bbbaa]
> 28: got 97 'a'
> 29: got 97 'a'
> 29: ungetting 97 'a'
> 30: got 97 'a'
> 31: got 98 'b'
> 32: got 98 'b'
> 32: ungetting 98 'b'
> 33: got 98 'b'
56,70c56,67
< 34: ungetting string [A
<
< ]
< PBB: L = 5: [bbA
<
< ]
< 35: got 10 '.'
< 35: ungetting 10 '.'
< 36: got 10 '.'
< 37: got 10 '.'
< 38: got 65 'A'
< 38: ungetting 65 'A'
< 39: got 65 'A'
< 40: got 98 'b'
< 41: got 98 'b'
---
> 34: ungetting string [aaa]
> PBB: L = 5: [A
> aaa]
> 35: got 97 'a'
> 35: ungetting 97 'a'
> 36: got 97 'a'
> 37: got 97 'a'
> 38: got 97 'a'
> 38: ungetting 97 'a'
> 39: got 97 'a'
> 40: got 10 '.'
> 41: got 65 'A'
$