【发布时间】:2016-10-09 22:19:29
【问题描述】:
我正在尝试使用 trie 进行拼写检查,但我尝试添加到我的 trie 中的单词似乎没有被输入。泄漏在哪里?
我花了几个小时使用调试器并单步执行我的代码...
主要功能:
/**
* Implements a spell-checker.
*/
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
#undef calculate
#undef getrusage
// default dictionary
#define DICTIONARY "dictionaries/large"
// prototype
double calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[])
{
// check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: speller [dictionary] text\n");
return 1;
}
// structs for timing data
struct rusage before, after;
// benchmarks
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// determine dictionary to use
char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// load dictionary
getrusage(RUSAGE_SELF, &before);
bool loaded = load(dictionary);
getrusage(RUSAGE_SELF, &after);
// abort if dictionary not loaded
if (!loaded)
{
printf("Could not load %s.\n", dictionary);
return 1;
}
// calculate time to load dictionary
time_load = calculate(&before, &after);
// try to open text
char *text = (argc == 3) ? argv[2] : argv[1];
FILE *fp = fopen(text, "r");
if (fp == NULL)
{
printf("Could not open %s.\n", text);
unload();
return 1;
}
// prepare to report misspellings
printf("\nMISSPELLED WORDS\n\n");
// prepare to spell-check
int index = 0, misspellings = 0, words = 0;
char word[LENGTH+1];
// spell-check each word in text
for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
{
// allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// append character to word
word[index] = c;
index++;
// ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// consume remainder of alphabetical string
while ((c = fgetc(fp)) != EOF && isalpha(c));
// prepare for new word
index = 0;
}
}
// ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// consume remainder of alphanumeric string
while ((c = fgetc(fp)) != EOF && isalnum(c));
// prepare for new word
index = 0;
}
// we must have found a whole word
else if (index > 0)
{
// terminate current word
word[index] = '\0';
// update counter
words++;
// check word's spelling
getrusage(RUSAGE_SELF, &before);
bool misspelled = !check(word);
getrusage(RUSAGE_SELF, &after);
// update benchmark
time_check += calculate(&before, &after);
// print word if misspelled
if (misspelled)
{
printf("%s\n", word);
misspellings++;
}
// prepare for next word
index = 0;
}
}
// check whether there was an error
if (ferror(fp))
{
fclose(fp);
printf("Error reading %s.\n", text);
unload();
return 1;
}
// close text
fclose(fp);
// determine dictionary'size
getrusage(RUSAGE_SELF, &before);
unsigned int n = size();
getrusage(RUSAGE_SELF, &after);
// calculate time to determine dictionary's size
time_size = calculate(&before, &after);
// unload dictionary
getrusage(RUSAGE_SELF, &before);
bool unloaded = unload();
getrusage(RUSAGE_SELF, &after);
// abort if dictionary not unloaded
if (!unloaded)
{
printf("Could not unload %s.\n", dictionary);
return 1;
}
// calculate time to unload dictionary
time_unload = calculate(&before, &after);
// report benchmarks
printf("\nWORDS MISSPELLED: %d\n", misspellings);
printf("WORDS IN DICTIONARY: %d\n", n);
printf("WORDS IN TEXT: %d\n", words);
printf("TIME IN load: %.2f\n", time_load);
printf("TIME IN check: %.2f\n", time_check);
printf("TIME IN size: %.2f\n", time_size);
printf("TIME IN unload: %.2f\n", time_unload);
printf("TIME IN TOTAL: %.2f\n\n",
time_load + time_check + time_size + time_unload);
// that's all folks
return 0;
}
/**
* Returns number of seconds between b and a.
*/
double calculate(const struct rusage *b, const struct rusage *a)
{
if (b == NULL || a == NULL)
{
return 0.0;
}
else
{
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
/ 1000000.0);
}
}
检查单词是否在 trie 中的函数:
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include "dictionary.h"
int dictionary_size;
bool find(char *word, node *next)
{
word++;
if (next == NULL)
{
return false;
}
else if (word[0] == '\0')
{
if (next->is_word == true)
{
return true;
}
else
{
return false;
}
}
else
{
//go to this nodes chold that is now eual to the first letter of the word
if (word[0] == '\'')
{
return find(word, next->children[26]);
}
else
{
return find(word, next->children[word[0] - 'a']);
}
}
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
//get word from text and make it malluable
char w[47];
memcpy(w, word, strlen(word) + 1);
//make sure it's all lower case
for (int i = 0; i < strlen(word); i++)
{
w[i] = tolower(w[i]);
}
//go to the root child node equal to the first letter of the word
if (find(w, root))
{
return true;
}
else
{
return false;
}
}
将单词字典加载到 trie 中的函数。 (我猜这是泄漏的地方?):
struct node *newNode(char *word, node *next)
{
next = malloc(sizeof(node));
word++;
if (word[0] == '\0')
{
next->is_word = true;
return next;
}
else
{
node *new_node = NULL;
// go to this nodes choild that is now equal to the first letter of the word
if (word[0] == '\'')
{
return next->children[26] = newNode(word, new_node);
}
else
{
return next->children[word[0] - 97] = newNode(word, new_node);
}
}
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
dictionary_size = 0;
FILE *dic = fopen(dictionary, "r");
if (dic == NULL)
{
return false;
}
// Initalize root node
root = malloc(sizeof(node));
//get word from dictinary
int ch;
while ((ch = fgetc(dic)) != EOF)
{
char word[47];
fgets(word, 47, dic);
dictionary_size++;
// make sure it's all lower case
for (int i = 0; i < 47; i++)
{
word[i] = tolower(word[i]);
}
// get rid of new line char
char *pos;
if ((pos = strchr(word, '\n')) != NULL)
{
*pos = '\0';
}
printf("%s\n", word);
//go to root nodes child that is equal to the first letter of the word
node *child_node = NULL;
root->children[word[0] - 'a'] = newNode(word, child_node);
}
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
if (dictionary_size != 0)
{
return dictionary_size;
}
return 0;
}
void free_node(node *next)
{
// safety including root node
if(!next) return;
// takes you to end of trie
for (int i = 0; i < 26; i++)
{
free_node(next->children[i]);
}
// base case
free(next);
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
free_node(root);
return true;
}
定义节点结构:
typedef struct node
{
bool is_word;
struct node *children[27];
}
node;
node *root;
【问题讨论】:
-
在previous incarnation of this question(需要 10K)中,我指出“我很确定如果您的文件包含 'aardvark' 和 'abelone',那么您会丢弃(泄漏)添加'abelone'时有关'aardvark'的信息。“我指的是
return next->children[word[0] - 97] = newNode(word, new_node);这行,它完全用你刚刚为'abelone'创建的内容替换了之前为'aardvark'创建的内容。你必须以完全不同的方式处理事情。那 97 应该写成'a'。 -
你可以看看Adding word to trie structure dictionary。我敢肯定还有其他相关的问题。这是 CS50 的吗?