【发布时间】:2017-03-07 04:49:25
【问题描述】:
我正在 Linux Ubuntu 16.04 上使用 C++ 编写文件爬虫。
基本上,它应该通过一个目录,将文件字节大小添加到链表并相应地创建新节点。因此,如果我有一个包含多个文件的文件夹,那么它们就可以了如下:
程序收到信号SIGSEGV,分段错误。 __strcpy_sse2_unaligned () 在 ../sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S:714 714 ../sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: 没有这样的文件或目录。
根据stack overflow post 1,我需要以某种方式安装一个库,但安装正确版本时存在问题。
我不认为这是我的问题,因为错误发生在哪里,根据调试器,当在递归函数内部调用它时会发生,该函数应该挖掘目录并获取数据。在我的主循环中调用相同的函数来遍历顶级目录,它在单层上工作正常,当我有嵌套目录时,我遇到了问题。附件是完整的源代码供参考,它有点长,但如果使用相同的参数调用该错误应该很容易复制:
// Directory crawler
// Written by Kaz
/*
1) Start at a user provided directory
2) Descend the file tree while tracking each file
3) Groups each file by it's size based off user argument
4) Print a histogram of file sizes in a bin wide groupings
*/
#include<iostream>
#include <dirent.h>
#include<string.h>
#include <errno.h>
#include <stdio.h>
#include<string>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include<stdlib.h>
using namespace std;
int binCount = 0; // count of total bins
struct node{
node* next, *prev;
int count, name, min, max;
node(){
prev = NULL;
next = NULL;
count = 0;
name = binCount;
min = 0;
max = 0;
}
};
node *nextNode(node* previousNode){
node *nextLink = new node;
nextLink ->name = binCount;
nextLink->prev = previousNode;
nextLink->next = NULL;
nextLink->count = 1;
nextLink->min = previousNode->max + 1;
nextLink->max = ((previousNode->max)*2) + 1;
previousNode ->next = nextLink;
return nextLink;
}
void traverseNewDirectory(node * here, char *dirName){
DIR * nwd;
struct dirent *dip;
node * current;
current = here;
bool isadirectory,isHidden;
if((nwd = opendir(dirName))== NULL){
perror("Can't open derived directory");
return;
}
while ((dip = readdir(nwd)) != NULL){
isadirectory = false;
isHidden = false;
if((dip -> d_type) == DT_UNKNOWN ){
struct stat stbuf;
stat(dip->d_name, &stbuf);
isadirectory = S_ISDIR(stbuf.st_mode);
}
else if((dip -> d_type) == DT_DIR ){
if((strcmp(dip->d_name, ".") == 0) || (strcmp(dip->d_name, "..")) == 0){
isHidden = true;
isadirectory = true;
}
else{
isadirectory = true;
}
}
else{
if((dip-> d_reclen <= current->max)&&(dip->d_reclen >=current->min)){
current->count = current->count+1;
}
else if(dip->d_reclen < current->min){
node*temp = current->prev;
while(temp->prev != NULL){
if((dip-> d_reclen <= current->max)&&(dip->d_reclen >=current->min)){
current->count = current->count+1;
break;
}
else if(dip->d_reclen < current->min){
temp = current->prev;
}
}
}
else{
current -> next = nextNode(current);
current = current -> next;
binCount++;
}
}
if(isadirectory){
char *path;
strcpy(path,dirName);
strcat(path, "/");
strcat(path,dip->d_name);
strcat(path, "\0");
if(isHidden == true){
}
else{
traverseNewDirectory(current, path);
}
}
}
while ( ( closedir (nwd) == -1) && ( errno == EINTR) );
}
void printHistogram(node *head){
node*temp;
temp = head;
while(temp!=NULL){
cout << "[B " << temp->name << "] from " << temp->min << " to " << temp->max << " : ";
for(int i = 0; i < temp->count; i++){
cout << "x";
}
cout << endl;
temp = temp->next;
}
}
int main(int argc,char *argv[]){
// Ensures that a valid directory is provided by the cmd line argument
if (argc != 3){
if(argc == 1){
fprintf (stderr, " argc = %d no directory given \n", argc);
return 1;
}
else if(argc == 2){
fprintf (stderr, " argc = %d no size given \n", argc);
return 2;
}
else{
fprintf(stderr, "argc = %d invalid parameters \n", argc);
return 3;
}
}
DIR * cwd; // current working directory pointer
struct dirent *cwdP; // pointer to dirent struct
int binWidth; // variable for the width of the grouping in the histogram
binWidth = atoi(argv[2]);
node *first = new node;
binCount++;
first->max = binWidth - 1;
node * current;
current = first;
bool isadirectory,isHidden;
if((cwd = opendir(argv[1]))== NULL){
perror("Can't open main directory");
return 2;
}
while ((cwdP = readdir(cwd)) != NULL){
isadirectory = false;
isHidden = false;
if((cwdP -> d_type) == DT_UNKNOWN ){
struct stat stbuf;
stat(cwdP->d_name, &stbuf);
isadirectory = S_ISDIR(stbuf.st_mode);
}
else if((cwdP -> d_type) == DT_DIR ){
if((strcmp(cwdP->d_name, ".") == 0) || (strcmp(cwdP->d_name, "..")) == 0){
isHidden = true;
isadirectory = true;
}
else{
isadirectory = true;
}
}
else{
if((cwdP-> d_reclen <= current->max)&&(cwdP->d_reclen >=current->min)){
current->count = current->count+1;
}
else if(cwdP->d_reclen < current->min){
node*temp = current->prev;
while(temp->prev != NULL){
if((cwdP-> d_reclen <= current->max)&&(cwdP->d_reclen >=current->min)){
current->count = current->count+1;
break;
}
else if(cwdP->d_reclen < current->min){
temp = current->prev;
}
}
}
else{
current -> next = nextNode(current);
current = current -> next;
binCount++;
}
}
if(isadirectory){
char *fullPath;
strcpy(fullPath,argv[1]);
strcat(fullPath,"/");
strcat(fullPath,cwdP->d_name);
strcat(fullPath, "\0");
if(isHidden == true){
}
else{
traverseNewDirectory(current, fullPath);
}
}
}
while ( ( closedir (cwd) == -1) && ( errno == EINTR) );
printHistogram(first);
return 0;
}
【问题讨论】:
-
char *path; strcat(path, "/");你希望它做什么?path被统一化并包含垃圾。所以你正在写一个垃圾位置。path在尝试写入之前需要指向一个有效的内存缓冲区。 -
同样的事情。
strcpy不会为您分配内存。它希望你已经做到了。 -
@KazRodgers 嗯?现在我只需要将我的评论更改为:
char *path; strcpy(path,dirName);你希望它做什么?path被统一化并包含垃圾。所以你正在写一个垃圾位置。path在尝试写入之前需要指向一个有效的内存缓冲区。 -
这属于未定义的行为类别。您的代码有时似乎可以工作......直到突然之间它不起作用。当它没有时,你通常认为自己很幸运,因为这样你就能发现你做错了什么。
-
“当我在 main 中做的时候没有这个问题?”。如果它是相同的代码,那么它有同样的问题。只是症状可能有所不同。这就是未定义行为的含义。我不能确切地告诉你如何初始化,因为我真的不想阅读你所有的代码来理解你打算用
path做什么。但是像这样:char path[MAX_PATH_LEN]或char *path = (char *) malloc(MAX_PATH_LEN)。在后一种情况下不要忘记释放内存。
标签: c++ linux segmentation-fault