【问题标题】:Is the FILE struct consistent between operating systems and architectures?FILE 结构在操作系统和体系结构之间是否一致?
【发布时间】:2020-05-19 00:07:48
【问题描述】:

对于下面的 C 代码 sn-p,LLVM 会产生下面的 IR。

#include <stdio.h>
#include <stdlib.h>

int main(){
  printf("Hello world\n");
  fflush(NULL);
  return 0;
}
; ModuleID = 'a.c'
source_filename = "a.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"

%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
%struct.__sFILEX = type opaque
%struct.__sbuf = type { i8*, i32 }

@str = private unnamed_addr constant [12 x i8] c"Hello world\00", align 1

; Function Attrs: nounwind ssp uwtable
define i32 @main() local_unnamed_addr #0 {
  %1 = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i64 0, i64 0))
  %2 = tail call i32 @fflush(%struct.__sFILE* null)
  ret i32 0
}

; Function Attrs: nounwind
declare i32 @fflush(%struct.__sFILE* nocapture) local_unnamed_addr #1

; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) local_unnamed_addr #2

attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 2, !"SDK Version", [3 x i32] [i32 10, i32 15, i32 4]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 7, !"PIC Level", i32 2}
!3 = !{!"Apple clang version 11.0.3 (clang-1103.0.32.59)"}

我可以认为对应的 FILE 结构的结构声明在不同的操作系统之间是一致的吗?有没有一种编程方式来获取这个结构的 LLVM 表示?

【问题讨论】:

  • "我可以认为对应的FILE结构的结构声明在不同的操作系统之间是一致的吗?"没有。

标签: c llvm llvm-ir


【解决方案1】:

FILE * 后面的类型结构对于您作为用户和操作系统之间的变量是不透明的。

更正式地说,它取决于 C(或 C++)编译器和您正在使用的支持库,但这通常是每个硬件和操作系统组合一个库,除了可能同时支持 32-位和 64 位代码。

一些程序 (Perl) 准备在配置阶段在不同机器上的结构内部进行探索,但他们这样做是有内部知识的。没有标准结构,除非像 GNU C 库这样的库可以跨机器对其进行标准化。但是假设它在每个地方都不一样是最安全的。

为了反驳Asadefa 在他们的answer 中提出的乐观观点,下面是 AIX 7.2 中的结构定义:

#if defined(__64BIT__) || defined(__ia64)
typedef struct {
    unsigned char   *_ptr;
    unsigned char   *_base;
    unsigned char   *_bufendp;
    char    *__newbase;
    void    *_lock;
    int _cnt;
    int _file;
    int __stdioid;
    short   _flag;
    short   _unused;
    long    _unused1[4];
} FILE;
#else /* 32-bit POWER */
typedef struct {
    unsigned char   *_ptr;
    int _cnt;
    unsigned char   *_base;
    unsigned char   *_bufendp;
    short   _flag;
    short   _file;
    int __stdioid;
    char    *__newbase;
    void    *_lock;
} FILE;
#endif /* __64BIT__ || __ia64 */

这在单个操作系统上的 32 位和 64 位之间有所不同,并且与 Linux 和 macOS 上的结构完全无关。

Solaris 10 上的结构再次不同——使用了一些其他的不使用的位域:

struct __FILE_TAG   /* needs to be binary-compatible with old versions */
{
#ifdef _STDIO_REVERSE
    unsigned char   *_ptr;  /* next character from/to here in buffer */
    int     _cnt;   /* number of available characters in buffer */
#else
    int     _cnt;   /* number of available characters in buffer */
    unsigned char   *_ptr;  /* next character from/to here in buffer */
#endif
    unsigned char   *_base; /* the buffer */
    unsigned char   _flag;  /* the state of the stream */
    unsigned char   _file; /* Old home of the file descriptor */
                /* Only fileno(3C) can retrieve the value now */
    unsigned    __orientation:2; /* the orientation of the stream */
    unsigned    __ionolock:1;   /* turn off implicit locking */
    unsigned    __seekable:1;   /* is file seekable? */
    unsigned    __extendedfd:1; /* enable extended FILE */
    unsigned    __xf_nocheck:1; /* no extended FILE runtime check */
    unsigned    __filler:10;
};

而我在HP-UX 11.31上找到的版本又不一样了:

   typedef struct {
    int      __cnt;
    unsigned char   *__ptr;
    unsigned char   *__base;
    unsigned short   __flag;
    unsigned char    __fileL;       /* low byte of file desc */
    unsigned char    __fileH;       /* high byte of file desc */
   } FILE;

当然,如果 Solaris、HP-UX、AIX 都没有出现在您的雷达屏幕上,您可能会得出不同的结论,但不同系统之间肯定存在巨大差异。

【讨论】:

    【解决方案2】:

    这是 macOS 实现:

    typedef struct __sFILE {
        unsigned char *_p;  /* current position in (some) buffer */
        int _r;     /* read space left for getc() */
        int _w;     /* write space left for putc() */
        short   _flags;     /* flags, below; this FILE is free if 0 */
        short   _file;      /* fileno, if Unix descriptor, else -1 */
        struct  __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
        int _lbfsize;   /* 0 or -_bf._size, for inline putc */
    
        /* operations */
        void    *_cookie;   /* cookie passed to io functions */
        int (* _Nullable _close)(void *);
        int (* _Nullable _read) (void *, char *, int);
        fpos_t  (* _Nullable _seek) (void *, fpos_t, int);
        int (* _Nullable _write)(void *, const char *, int);
    
        /* separate buffer for long sequences of ungetc() */
        struct  __sbuf _ub; /* ungetc buffer */
        struct __sFILEX *_extra; /* additions to FILE to not break ABI */
        int _ur;        /* saved _r when _r is counting ungetc data */
    
        /* tricks to meet minimum requirements even when malloc() fails */
        unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
        unsigned char _nbuf[1]; /* guarantee a getc() buffer */
    
        /* separate buffer for fgetln() when line crosses buffer boundary */
        struct  __sbuf _lb; /* buffer for fgetln() */
    
        /* Unix stdio files get aligned to block boundaries on fseek() */
        int _blksize;   /* stat.st_blksize (may be != _bf._size) */
        fpos_t  _offset;    /* current lseek offset (see WARNING) */
    } FILE;
    

    这是 Linux 实现:

    typedef struct __sFILE {
        unsigned char *_p;  /* current position in (some) buffer */
        int _r;     /* read space left for getc() */
        int _w;     /* write space left for putc() */
        short   _flags;     /* flags, below; this FILE is free if 0 */
        short   _file;      /* fileno, if Unix descriptor, else -1 */
        struct  __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
        int _lbfsize;   /* 0 or -_bf._size, for inline putc */
    
        /* operations */
        void    *_cookie;   /* cookie passed to io functions */
        int (*_close)(void *);
        int (*_read)(void *, char *, int);
        fpos_t  (*_seek)(void *, fpos_t, int);
        int (*_write)(void *, const char *, int);
    
        /* extension data, to avoid further ABI breakage */
        struct  __sbuf _ext;
        /* data for long sequences of ungetc() */
        unsigned char *_up; /* saved _p when _p is doing ungetc data */
        int _ur;        /* saved _r when _r is counting ungetc data */
    
        /* tricks to meet minimum requirements even when malloc() fails */
        unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
        unsigned char _nbuf[1]; /* guarantee a getc() buffer */
    
        /* separate buffer for fgetln() when line crosses buffer boundary */
        struct  __sbuf _lb; /* buffer for fgetln() */
    
        /* Unix stdio files get aligned to block boundaries on fseek() */
        int _blksize;   /* stat.st_blksize (may be != _bf._size) */
        fpos_t  _offset;    /* current lseek offset */
    } FILE;
    

    _Nullable 是 macOS 的特性,我猜可以忽略。看起来它们在其他方面是相同的,因为无论有没有_Nullable,代码的工作方式都是一样的。除了unsigned char *_up;struct __sFILEX *_extra; 之外, FILE 结构似乎在 Linux 和 Mac 平台中是不变的。

    【讨论】:

    • 感谢您的两个定义。 linux 和 osx 上的 FILE* 之间似乎存在细微差别。第三类的第二个字段在 mac 上是 struct __sFILEX *_extra,在 linux 上是 unsigned char *_up
    • @Guilherme 哎呀,我会改变它
    • 没有“macOS 实现”或“Linux 实现”之类的东西。 FILE 结构的定义是每个编译器和 C 库实现的内部。它唯一允许(和可移植)的使用是通过 FILE * 不透明指针。
    • @dxiv 这些我是从对应平台的stdio.h得到的。
    • macOS 实现中的_extra 和Linux 实现中的_up 之间的区别是根本的——尽管它们在结构中占据相同的位置并且可能具有相同的大小。 _ub_ext 之间的差异可能很大,也可能不重要;这取决于两个系统上非标准@​​987654336@结构的细节。
    猜你喜欢
    • 1970-01-01
    • 2018-07-09
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多