Stevens (et al) UNIX® Network Programming, Vol 1: The Sockets Networking API 在第 15 章Unix 域协议,特别是第 15.7 节传递描述符中描述了在进程之间传输文件描述符的过程。完整描述很麻烦,但必须在 Unix 域套接字上完成(AF_UNIX 或 AF_LOCAL),并且发送方进程使用sendmsg(),而接收方使用recvmsg()。
我从问题中得到了这个经过轻微修改(和检测)的代码版本,可以在带有 GCC 4.9.1 的 Mac OS X 10.10.1 Yosemite 上为我工作:
#include "stderr.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
static
void wyslij(int socket, int fd) // send fd by socket
{
struct msghdr msg = { 0 };
char buf[CMSG_SPACE(sizeof(fd))];
memset(buf, '\0', sizeof(buf));
struct iovec io = { .iov_base = "ABC", .iov_len = 3 };
msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);
struct cmsghdr * cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
*((int *) CMSG_DATA(cmsg)) = fd;
msg.msg_controllen = CMSG_SPACE(sizeof(fd));
if (sendmsg(socket, &msg, 0) < 0)
err_syserr("Failed to send message\n");
}
static
int odbierz(int socket) // receive fd from socket
{
struct msghdr msg = {0};
char m_buffer[256];
struct iovec io = { .iov_base = m_buffer, .iov_len = sizeof(m_buffer) };
msg.msg_iov = &io;
msg.msg_iovlen = 1;
char c_buffer[256];
msg.msg_control = c_buffer;
msg.msg_controllen = sizeof(c_buffer);
if (recvmsg(socket, &msg, 0) < 0)
err_syserr("Failed to receive message\n");
struct cmsghdr * cmsg = CMSG_FIRSTHDR(&msg);
unsigned char * data = CMSG_DATA(cmsg);
err_remark("About to extract fd\n");
int fd = *((int*) data);
err_remark("Extracted fd %d\n", fd);
return fd;
}
int main(int argc, char **argv)
{
const char *filename = "./z7.c";
err_setarg0(argv[0]);
err_setlogopts(ERR_PID);
if (argc > 1)
filename = argv[1];
int sv[2];
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) != 0)
err_syserr("Failed to create Unix-domain socket pair\n");
int pid = fork();
if (pid > 0) // in parent
{
err_remark("Parent at work\n");
close(sv[1]);
int sock = sv[0];
int fd = open(filename, O_RDONLY);
if (fd < 0)
err_syserr("Failed to open file %s for reading\n", filename);
wyslij(sock, fd);
close(fd);
nanosleep(&(struct timespec){ .tv_sec = 1, .tv_nsec = 500000000}, 0);
err_remark("Parent exits\n");
}
else // in child
{
err_remark("Child at play\n");
close(sv[0]);
int sock = sv[1];
nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000}, 0);
int fd = odbierz(sock);
printf("Read %d!\n", fd);
char buffer[256];
ssize_t nbytes;
while ((nbytes = read(fd, buffer, sizeof(buffer))) > 0)
write(1, buffer, nbytes);
printf("Done!\n");
close(fd);
}
return 0;
}
经过检测但未修复的原始代码版本的输出是:
$ ./fd-passing
fd-passing: pid=1391: Parent at work
fd-passing: pid=1391: Failed to send message
error (40) Message too long
fd-passing: pid=1392: Child at play
$ fd-passing: pid=1392: Failed to receive message
error (40) Message too long
注意,父级先于子级完成,所以提示出现在输出中间。
“固定”代码的输出是:
$ ./fd-passing
fd-passing: pid=1046: Parent at work
fd-passing: pid=1048: Child at play
fd-passing: pid=1048: About to extract fd
fd-passing: pid=1048: Extracted fd 3
Read 3!
This is the file z7.c.
It isn't very interesting.
It isn't even C code.
But it is used by the fd-passing program to demonstrate that file
descriptors can indeed be passed between sockets on occasion.
Done!
fd-passing: pid=1046: Parent exits
$
主要的重大变化是将struct iovec 添加到两个函数中struct msghdr 的数据中,并在接收函数(odbierz()) 中为控制消息提供空间。我报告了调试的一个中间步骤,我向父级提供了struct iovec,并且删除了父级的“消息太长”错误。为了证明它正在工作(传递了一个文件描述符),我添加了代码来从传递的文件描述符中读取和打印文件。原始代码有sleep(0.5),但由于sleep() 采用无符号整数,这相当于不睡觉。我使用 C99 复合文字让孩子睡 0.5 秒。父进程休眠 1.5 秒,以便子进程的输出在父进程退出之前完成。我也可以使用wait() 或waitpid(),但我懒得这样做。
我还没有回去检查是否所有的添加都是必要的。
"stderr.h" 标头声明了err_*() 函数。这是我编写的代码(1987 年之前的第一个版本),用于简洁地报告错误。 err_setlogopts(ERR_PID) 调用为所有带有 PID 的消息添加前缀。对于时间戳,err_setlogopts(ERR_PID|ERR_STAMP) 也可以完成这项工作。
对齐问题
Nominal Animal 在comment 中建议:
我可以建议您修改代码以使用memcpy() 复制描述符int 而不是直接访问数据吗?它不一定正确对齐——这就是手册页示例也使用 memcpy() 的原因——并且在许多 Linux 架构中,未对齐的 int 访问会导致问题(直至 SIGBUS 信号终止进程)。
不仅是 Linux 架构:SPARC 和 Power 都需要对齐的数据,并且通常分别运行 Solaris 和 AIX。曾几何时,DEC Alpha 也要求这样做,但现在他们很少在现场看到。
手册页cmsg(3)中与此相关的代码是:
struct msghdr msg = {0};
struct cmsghdr *cmsg;
int myfds[NUM_FD]; /* Contains the file descriptors to pass. */
char buf[CMSG_SPACE(sizeof myfds)]; /* ancillary data buffer */
int *fdptr;
msg.msg_control = buf;
msg.msg_controllen = sizeof buf;
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int) * NUM_FD);
/* Initialize the payload: */
fdptr = (int *) CMSG_DATA(cmsg);
memcpy(fdptr, myfds, NUM_FD * sizeof(int));
/* Sum of the length of all control messages in the buffer: */
msg.msg_controllen = CMSG_SPACE(sizeof(int) * NUM_FD);
对fdptr 的赋值似乎假设CMSG_DATA(cmsg) 已充分对齐以转换为int *,并且memcpy() 的使用假设NUM_FD 不只是1。话虽如此,它应该指向数组buf,并且可能没有像Nominal Animal建议的那样充分对齐,所以在我看来fdptr只是一个闯入者,如果使用示例会更好:
memcpy(CMSG_DATA(cmsg), myfds, NUM_FD * sizeof(int));
然后接收端的相反过程将是适当的。这个程序只传递一个文件描述符,所以代码可以修改为:
memmove(CMSG_DATA(cmsg), &fd, sizeof(fd)); // Send
memmove(&fd, CMSG_DATA(cmsg), sizeof(fd)); // Receive
我似乎还记得各种操作系统 w.r.t 的历史问题。没有正常有效负载数据的辅助数据,也通过发送至少一个虚拟字节来避免,但我找不到任何要验证的参考,所以我可能记错了。
鉴于 Mac OS X(基于 Darwin/BSD)至少需要一个 struct iovec,即使它描述的是零长度消息,我也愿意相信上面显示的代码,其中包括3 字节消息,是朝着正确大方向迈出的一大步。消息可能应该是一个空字节而不是 3 个字母。
我已将代码修改为如下所示。它使用memmove() 将文件描述符复制到cmsg 缓冲区和从缓冲区复制文件描述符。它传输单个消息字节,这是一个空字节。
在将文件描述符传递给子进程之前,它还让父进程读取(最多)32 个字节的文件。孩子从父母离开的地方继续阅读。这表明传输的文件描述符包括文件偏移量。
接收方应在将cmsg 视为文件描述符传递消息之前对其进行更多验证。
#include "stderr.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
static
void wyslij(int socket, int fd) // send fd by socket
{
struct msghdr msg = { 0 };
char buf[CMSG_SPACE(sizeof(fd))];
memset(buf, '\0', sizeof(buf));
/* On Mac OS X, the struct iovec is needed, even if it points to minimal data */
struct iovec io = { .iov_base = "", .iov_len = 1 };
msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);
struct cmsghdr * cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
memmove(CMSG_DATA(cmsg), &fd, sizeof(fd));
msg.msg_controllen = CMSG_SPACE(sizeof(fd));
if (sendmsg(socket, &msg, 0) < 0)
err_syserr("Failed to send message\n");
}
static
int odbierz(int socket) // receive fd from socket
{
struct msghdr msg = {0};
/* On Mac OS X, the struct iovec is needed, even if it points to minimal data */
char m_buffer[1];
struct iovec io = { .iov_base = m_buffer, .iov_len = sizeof(m_buffer) };
msg.msg_iov = &io;
msg.msg_iovlen = 1;
char c_buffer[256];
msg.msg_control = c_buffer;
msg.msg_controllen = sizeof(c_buffer);
if (recvmsg(socket, &msg, 0) < 0)
err_syserr("Failed to receive message\n");
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
err_remark("About to extract fd\n");
int fd;
memmove(&fd, CMSG_DATA(cmsg), sizeof(fd));
err_remark("Extracted fd %d\n", fd);
return fd;
}
int main(int argc, char **argv)
{
const char *filename = "./z7.c";
err_setarg0(argv[0]);
err_setlogopts(ERR_PID);
if (argc > 1)
filename = argv[1];
int sv[2];
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) != 0)
err_syserr("Failed to create Unix-domain socket pair\n");
int pid = fork();
if (pid > 0) // in parent
{
err_remark("Parent at work\n");
close(sv[1]);
int sock = sv[0];
int fd = open(filename, O_RDONLY);
if (fd < 0)
err_syserr("Failed to open file %s for reading\n", filename);
/* Read some data to demonstrate that file offset is passed */
char buffer[32];
int nbytes = read(fd, buffer, sizeof(buffer));
if (nbytes > 0)
err_remark("Parent read: [[%.*s]]\n", nbytes, buffer);
wyslij(sock, fd);
close(fd);
nanosleep(&(struct timespec){ .tv_sec = 1, .tv_nsec = 500000000}, 0);
err_remark("Parent exits\n");
}
else // in child
{
err_remark("Child at play\n");
close(sv[0]);
int sock = sv[1];
nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000}, 0);
int fd = odbierz(sock);
printf("Read %d!\n", fd);
char buffer[256];
ssize_t nbytes;
while ((nbytes = read(fd, buffer, sizeof(buffer))) > 0)
write(1, buffer, nbytes);
printf("Done!\n");
close(fd);
}
return 0;
}
还有一个示例运行:
$ ./fd-passing
fd-passing: pid=8000: Parent at work
fd-passing: pid=8000: Parent read: [[This is the file z7.c.
It isn't ]]
fd-passing: pid=8001: Child at play
fd-passing: pid=8001: About to extract fd
fd-passing: pid=8001: Extracted fd 3
Read 3!
very interesting.
It isn't even C code.
But it is used by the fd-passing program to demonstrate that file
descriptors can indeed be passed between sockets on occasion.
And, with the fully working code, it does indeed seem to work.
Extended testing would have the parent code read part of the file, and
then demonstrate that the child codecontinues where the parent left off.
That has not been coded, though.
Done!
fd-passing: pid=8000: Parent exits
$