【发布时间】:2016-08-31 16:27:08
【问题描述】:
我正在尝试在我的 Ubuntu x86_64 上的分叉进程之间共享一个文本文件:该文件不会太大,因为只有在文件中没有另一个相同的字符串时才会写入字符串;字符串将是访问网站的主机名,因此我假设每个主机名不超过 255 个字节。
当轮到进程写入共享对象时,可以;一旦所有进程都写入共享对象,msync 应该使写入在磁盘上生效,但是创建的mapped.txt 文件只包含来自arrayString 的一个字符串,即最后一个进程写入共享对象的字符串。
代码如下:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <string.h>
// first forked process will write "first" in file, and so on
const char *arrayString[] = {
"first",
"second",
"third"
};
int main(void) {
int index;
int children = 3;
const char *filepath = "mapped.txt";
sem_t *sem;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
char *data;
data = (char *)mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("mmap:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
if (fork() == 0) {
sem_wait(sem);
size_t textsize = strlen(arrayString[index])+1;
if (ftruncate(fd, sizeof(textsize)) == -1) {
perror("ftruncate:");
return EXIT_FAILURE;
}
for (size_t i = 0; i < textsize; i++) {
printf("%d Writing character %c at %zu\n", getpid(), arrayString[index][i], i);
data[i] = arrayString[index][i];
}
printf("%d wrote ", getpid());
for (size_t i = 0; i < textsize; i++) {
printf("%c", data[i]);
}
printf("\n");
if (msync(data, textsize, MS_SYNC) == -1) {
perror("Could not sync the file to disk");
}
sem_post(sem);
_exit(EXIT_SUCCESS);
}
}
close(fd);
return EXIT_SUCCESS;
}
这是三个子进程的上述代码的一种可能输出(这很好):
20373 Writing character s at 0
20373 Writing character e at 1
20373 Writing character c at 2
20373 Writing character o at 3
20373 Writing character n at 4
20373 Writing character d at 5
20373 Writing character at 6
20373 wrote second
20374 Writing character t at 0
20374 Writing character h at 1
20374 Writing character i at 2
20374 Writing character r at 3
20374 Writing character d at 4
20374 Writing character at 5
20374 wrote third
20372 Writing character f at 0
20372 Writing character i at 1
20372 Writing character r at 2
20372 Writing character s at 3
20372 Writing character t at 4
20372 Writing character at 5
20372 wrote first
这是mapped.txt的内容(这很糟糕):
first^@^@^@
我预计:
second
third
first
但我得到的只是最后一个进程的字符串,带有那些奇怪的符号。我想将此文件持久保存在内存中,但由于 I/O 缓慢,我正在尝试使用内存映射。 知道为什么我的文件只包含访问共享文件的最后一个进程写入的字符串吗?
编辑:我想我明白了,它现在似乎工作了:我希望它对某人有所帮助。使用g++ -g -o mapthis mapthis.cpp -lrt -pthread 编译。请注意缺少一些错误检查,例如 fsync、snprintf 和 lseek。
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <time.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
const char *arrayString[] = {
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com"
};
int main(void) {
int index;
int children = sizeof(arrayString) / sizeof(char*);;
const char *filepath = "mapped.txt";
sem_t *sem;
char *data;
struct stat filestats;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
if (fstat(fd, &filestats) < 0) {
close(fd);
perror("fstat:");
return EXIT_FAILURE;
}
data = (char *)mmap(NULL, filestats.st_size ? filestats.st_size : 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("first map:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
sleep(1);
pid_t pid = fork();
if (pid == 0) {
int nw = 0;
int hostnameSize = 0;
const size_t origsize = filestats.st_size;
char *hostPos = NULL;
char *numPos = NULL;
char *backslashPos = NULL;
char tempBuff[64];
memset((char *)tempBuff, 0, sizeof(tempBuff));
sem_wait(sem);
// remap to current file size if it changed
fstat(fd, &filestats);
// file empty, just insert
if (filestats.st_size == 0) {
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
else {
// file not empty, let's look for string
hostPos = strstr(data, arrayString[index]);
if (hostPos) {
// string is already inserted, search for offset of number of seconds
lseek(fd, hostPos-data, SEEK_SET);
numPos = strchr(hostPos, ' ')+1;
backslashPos = strchr(numPos, '\n');
long unsigned before = atoi(numPos);
long unsigned now = (unsigned long)time(NULL);
long unsigned difference = now - before;
printf("%s visited %ld seconds ago (%ld - %ld)\n",
arrayString[index], difference, now, before);
nw = snprintf(tempBuff, backslashPos-hostPos+1, "%s %010lu", arrayString[index], now);
write(fd, tempBuff, nw);
write(fd, "\n", 1);
fsync(fd);
}
else {
data = (char *)mremap(data, origsize, filestats.st_size, MREMAP_MAYMOVE);
if (data == MAP_FAILED) {
close(fd);
sem_post(sem);
perror("mmap:");
_exit(EXIT_FAILURE);
}
lseek(fd, 0, SEEK_END);
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
}
munmap(data, filestats.st_size);
close(fd);
sem_post(sem);
_exit(EXIT_SUCCESS);
}
else if (pid > 0) {
wait(NULL);
}
}
munmap(data, filestats.st_size);
close(fd);
return EXIT_SUCCESS;
}
【问题讨论】: