【问题标题】:Counting Words from Text File into Linked List in C将文本文件中的单词计数到 C 中的链表中
【发布时间】:2016-03-31 03:39:27
【问题描述】:

我是 C 编程新手,对于我的一个课程的作业,我们将编写可以读取文本文件的代码,将所有单词放入带有计数器的链表中,然后输出该链表到一个文本文件中。

我不确定我在下面的代码中做错了什么,它只计算了一些单词并且似乎跳过了其他单词。 (我添加了许多额外的打印来尝试查看单词丢失的位置)

然后我尝试在将所有单词添加到链接列表时将它们变成小写,但这似乎使它读的单词更少,我不知道为什么。

任何帮助/建议将不胜感激。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

typedef struct s_words {
    char *str; //word
    int count; //number of times word occurs
    struct s_words* next; //pointer to next word
} words;


words* create_words(char* word) {
    //+allocate space for the structure
    printf("%lu ", strlen(word));
    words* newWord = malloc(sizeof(words));
    if (NULL != newWord)
    {
        //+allocate space for storing the new word in "str"
        //+if str was array of fixed size, storage wud be wasted
        newWord->str = (char *)malloc((strlen(word))+1);
        strcpy(newWord->str, word); //+copy “word” into newWord->str
        newWord->str[strlen(word)]='\0';
        printf(" Create: %s ", newWord->str);
        //+initialize count to 1;
        newWord->count = 1;
        //+initialize next;
        newWord->next = NULL;                   
    }
    return newWord;
}

words* add_word(words* wordList, char* word) 
{
    int found=0;
    words *temp=wordList;
    //+ search if word exists in the list; if so, make found=1
    while (temp != NULL) 
    {

        if (strcmp(temp->str, word) == 0) 
        {  //+use strcmp command
            found=1;
            temp->count = temp->count+1; //+increment count;
            return wordList;
        }
        else 
        {
            //+update temp
            temp = temp->next;
        }
    }
    if (found==0) 
    {  //new word
        //printf("%s ", word);
        words* newWord = create_words(word);
        if (NULL != newWord) 
        {
            //+??  Insert new word at the head of the list
            newWord->next = wordList;
            printf(" NEW WORD: %s\n ", newWord->str);
        }
        return newWord;
    }
    //return wordList; //code never gets here, just added in case of error    
}




int main(int argc, char* argv[]) 
{

    words *mywords;  //+head of linked list containing words
    mywords=NULL;

    FILE *myFile;
    FILE *myOutput;

    char* filename = argv[1];
    char* outputfile = argv[2];

    myFile = fopen(filename, "r");  //+first parameter is input file
    if (myFile==0) 
    {
        printf("file not opened\n");
        return 1;
    }
    else 
    {
        printf("file opened \n");
    }

    //+start reading file character by character;
    //+when word has been detected; call the add_word function

    int ch = 0, word = 1, k = 0;
    char thisword[100];
    //ch = putchar(tolower(ch));
    //ch = fgetc(myFile);
    while ((ch = fgetc(myFile)) != EOF )
    {
        //error handling

        if (ch == '.' || ch == ' ' || ch == ',' || ch == ':' || ch == ';' || ch == '\n')  //+detect new word? Check if ch is a delimiter
        { //when above if is true, new word created in next if:
            if ( word == 1 )  //+make sure previous character was not delimiter
            {
                word = 0;
                //+make the kth character of thisword as \0
                thisword[k] = '\0';

                //+now call add_word to add thisword into the list
                printf(" Add:%s ", thisword);
                mywords = add_word(mywords, thisword);
                printf(" Added:%s\n", mywords->str);

                k=0;
            }
        }
        else
        {
            word = 1;
            //make ch lowercase
            //ch = putchar(toupper(ch));
            //+?? //make the kth character of thisword equal to ch
            thisword[k] = ch;
            thisword[k] = putchar(tolower(thisword[k]));
            k++;
        }
    }
    if (word == 1) 
    {
        thisword[k] = '\0';
        //add thisword into the list
        printf("Last Word:%s ", thisword);
        mywords = add_word(mywords, thisword);      
    }

    words *currword;
    printf("printing list\n");

    //+Traverse list and print each word and its count to outputfile
    //+output file is second parameter being passed

    myOutput = fopen(outputfile, "w+");  //+first parameter is input file
    if (myOutput == 0) 
    {
        printf("output file not opened \n");
        return 1;
    }
    else 
    {
        printf("output file opened \n");
    }

    currword = mywords;

    while (currword->next != NULL)
    {
        //add word name then word count to file, then move to next
        fprintf(myOutput, "%s %d \n", currword->str, currword->count);
        printf("%s ", currword->str);
        currword = currword->next;
    }

    return 0;

}

【问题讨论】:

  • 您必须对文件中的所有单词进行计数。这意味着单词的重复也是你必须计算的。
  • 你为什么使用面向字符的输入(例如fgetc从文件中读取),而不是面向行的输入(例如。 fgets 一次读取一行),或者至少格式化输入(fscanf 一次读取一个单词)?您可以使用fgetc 来做到这一点,但您的成功将取决于正确覆盖所有可能的单词分隔符(这会增加很多错误空间)(例如,如果一个单词被 tabs 分隔怎么办@ 987654326@?)
  • 为了确保 LETTER 是小写的(包含在标准 ascii 表中的那些),我建议用 32 按位执行 & (确保它是一个字母而不是另一个询问)。如果您不确定为什么会这样,请检查 ascii 表中 a 和 A 之间的区别。
  • 相邻的行thisword[k] = ch;thisword[k] = putchar(tolower(thisword[k]));很好奇。为什么不:thisword[k] = tolower(ch); putchar(thisword[k]);?我怀疑这是否真的是问题的一部分,但对我来说似乎更简单。
  • @Mr.Branch:与使用&lt;ctype.h&gt; 中的tolower()(可能还有isupper())函数相比,您为什么建议这样做?尤其是在 C 语言环境之外,这些功能很可能比使用 32 屏蔽更好。

标签: c file-io struct linked-list


【解决方案1】:

你肯定喜欢让自己难过...你最大的四个问题是(1)没有将mywords地址传递给add_words,(2) 未能处理 add_words 中的 New/Empty 列表情况,(3) 将新节点添加到列表头部,以及 (4 ) 每次拨打add_word 时都会覆盖您的列表地址(例如mywords = add_words...

解决每个问题并稍微整理一下解析,您应该能够在列表中找到所有单词。查看/测试以下内容:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

typedef struct s_words {
    char *str;                  //word
    int count;                  //number of times word occurs
    struct s_words *next;       //pointer to next word
} words;

words *create_words (char *word)
{
    //+allocate space for the structure
    printf ("%lu ", strlen (word));
    words *newWord = malloc (sizeof (words));
    if (NULL != newWord) {
        //+allocate space for storing the new word in "str"
        //+if str was array of fixed size, storage wud be wasted
        newWord->str = (char *) malloc ((strlen (word)) + 1);
        strcpy (newWord->str, word);    //+copy “word” into newWord->str
        newWord->str[strlen (word)] = '\0';
        printf (" Create: %s ", newWord->str);
        //+initialize count to 1;
        newWord->count = 1;
        //+initialize next;
        newWord->next = NULL;
    }
    return newWord;
}

words *add_word (words **wordList, char *word)
{
    if (!*wordList) {       /* handle EMPTY list */
        printf ("NEW LIST\n");
        return *wordList = create_words (word);
    }

    words *temp = *wordList;
    //+ search if word exists in the list; if so, make found=1
    while (temp->next != NULL) {    /* iterate while temp->next != NULL */

        if (strcmp (temp->str, word) == 0) {    //+use strcmp command
            temp->count = temp->count + 1;      //+increment count;
            return *wordList;
        }
        else
            temp = temp->next;  //+update temp
    }
    words *newWord = create_words (word);
    if (NULL != newWord) {  /* insert at TAIL of list */
        temp->next = newWord; 
        printf (" NEW WORD: %s\n ", newWord->str);
    }
    return newWord;
}

int main (int argc, char *argv[]) {

    words *mywords;             //+head of linked list containing words
    mywords = NULL;
    char *delim = ". ,:;\t\n";

    FILE *myFile;
    FILE *myOutput;

    char *filename = argv[1];
    char *outputfile = argv[2];

    if (argc != 3) {
        fprintf (stderr, "error: insufficient input. usage: %s ifile ofile\n",
                argv[0]);
        return 1;
    }

    myFile = fopen (filename, "r");     //+first parameter is input file
    if (myFile == 0) {
        printf ("file not opened\n");
        return 1;
    } else {
        printf ("file opened \n");
    }

    //+start reading file character by character;
    //+when word has been detected; call the add_word function

    int ch = 0, word = 1, k = 0;
    char thisword[100];
    while ((ch = fgetc (myFile)) != EOF) {  /* for each char    */
        if (strchr (delim, ch)) {           /* check if delim   */
            if (word == 1) {    /* if so, terminate word, reset */
                word = 0;
                thisword[k] = '\0';

                printf ("\nadd_word (mywords, %s)\n", thisword);
                /* do NOT overwrite list address each time,
                 * you must send ADDRESS of list to add_word
                 * to handle EMPTY list case.
                 */
                if (add_word (&mywords, thisword))
                    printf (" added: %s\n", mywords->str);
                else
                    fprintf (stderr, "error: add_word failed.\n");

                k = 0;
            }
        }
        else {  /* if not delim, add char to string, set word 1 */
            word = 1;
            thisword[k++] = tolower (ch);   /* make ch lowercase */
        }
    }
    if (word == 1) {    /* handle non-POSIX line-end */
        thisword[k] = '\0';
        //add thisword into the list
        printf ("\nadd_word (mywords, %s) (last)\n", thisword);
        if (add_word (&mywords, thisword))  /* same comment as above */
            printf (" added: %s\n", mywords->str);
        else
            fprintf (stderr, "error: add_word failed.\n");
    }

    words *currword;
    printf ("printing list\n");

    //+Traverse list and print each word and its count to outputfile
    //+output file is second parameter being passed

    myOutput = fopen (outputfile, "w+");        //+first parameter is input file
    if (myOutput == 0) {
        printf ("output file not opened \n");
        return 1;
    } else {
        printf ("output file opened \n");
    }

    currword = mywords;

    while (currword != NULL) {  /* just test currword here */
        //add word name then word count to file, then move to next
        fprintf (myOutput, "%s %d \n", currword->str, currword->count);
        printf ("%s ", currword->str);
        currword = currword->next;
    }

    putchar ('\n');
    return 0;
}

输入文件

$ cat ../dat/captnjack.txt
This is a tale
Of Captain Jack Sparrow
A Pirate So Brave
On the Seven Seas.

测试使用

$ ./bin/llwordcount ../dat/captnjack.txt dat/llout.txt

输出文件

$ cat dat/llout.txt
this 1
is 1
a 2
tale 1
of 1
captain 1
jack 1
sparrow 1
pirate 1
so 1
brave 1
on 1
the 1
seven 1
seas 1

注意:对于打印/输出,您只需要while (currword != NULL) 遍历列表。

话虽如此,您确实应该考虑使用面向行的输入(fgetsgetline)并将每行数据解析为单词,而不是逐行读取 -character 并寻找分隔符。一次读取/解析一行更容易,更不容易出错。由于面向行的输入被缓冲,因此读取速度也快得多。您可以一次读取一个字符,只是速度较慢,而且沿途还有很多陷阱。

消化更改(上面用/* ... */ 评论),如果您有任何问题,请告诉我。

【讨论】:

  • 感谢您的帮助!我添加了几个分隔符并用几个文本文件对其进行了测试,并且每次都有效。
  • 很高兴我能帮上忙。对于您的第一个列表,您离我们不远了。祝你编码顺利。
猜你喜欢
  • 2014-11-05
  • 1970-01-01
  • 2020-10-04
  • 1970-01-01
  • 2013-05-05
  • 1970-01-01
  • 1970-01-01
  • 2015-07-13
  • 1970-01-01
相关资源
最近更新 更多