【问题标题】:how to escape values for writing a CSV file using C language如何转义值以使用 C 语言编写 CSV 文件
【发布时间】:2014-03-06 10:51:59
【问题描述】:

我正在做一个将数据写入 CSV 文件的项目。 如何转义值以使用 C 语言编写 CSV 文件。

void writeToFile(struct raDataStructure data)
{
    FILE *fp;
    fp = fopen("result.data", "a");
    fprintf(fp, "%s,", data.long);
    fprintf(fp, "%s,", data.lat);
    fprintf(fp, "%s,", data.city);
    fprintf(fp, "%d,", data.pobox);
    fprintf(fp, "%s,", data.bio);
    fprintf(fp, "%d,", data.bNumber);
    fclose(fp);
}

提交的data.bio 可以包含任何字符,包括逗号、引号和斜线。在写入文件以生成有效的 csv 文件之前,我该如何对其进行转义。

【问题讨论】:

  • 通过编写一个将字符串转换为转义字符串的函数。展示你的尝试。
  • fprintf(fp, "\"%s\",", data.bio);
  • @BLUEPIXY: 差不多,但是如果字符串包含 '"' 它们必须重复,例如 [Hello "World"] 应该是 ["Hello ""World"""] 一旦转义。跨度>
  • 有人有为此预建的功能吗?
  • 没有标准函数。

标签: c csv


【解决方案1】:

这应该会给你一个很好的起点。请注意,您需要 free() 完成后返回的值。

char* escapeCSV(char* in) {
  int in_len = strlen(in);
  char *out_buf = malloc(in_len*2+3);
  int out_idx = 0;
  int in_idx = 0;

  out_buf[out_idx++] = '"';
  for(in_idx=0; in_idx < in_len; in_idx++) {
    if(in[in_idx] == '"') {
      out_buf[out_idx++] = '"';
      out_buf[out_idx++] = '"';
    } else {
      out_buf[out_idx++] = in[in_idx];
    }
  }
  out_buf[out_idx++] = '"';
  out_buf[out_idx++] = 0;
  return out_buf;
}

【讨论】:

    【解决方案2】:

    csv.h

    /*
    @(#)File:           $RCSfile: csv.h,v $
    @(#)Version:        $Revision: 2.1 $
    @(#)Last changed:   $Date: 2012/11/01 23:45:19 $
    @(#)Purpose:        Scanner for Comma Separated Variable (CSV) Data
    @(#)Author:         J Leffler
    @(#)Origin:         Kernighan & Pike, 'The Practice of Programming'
    */
    
    #ifndef CSV_H
    #define CSV_H
    
    #include <stdio.h>
    
    extern char  *csvgetline(FILE *ifp);    /* Read next input line */
    extern char  *csvgetfield(size_t n);    /* Return field n */
    extern size_t csvnfield(void);          /* Return number of fields */
    extern void   csvreset(void);           /* Release space used by CSV */
    
    extern int    csvputfield(FILE *ofp, const char *field);
    extern int    csvputline(FILE *ofp, char **fields, int nfields);
    extern void   csvseteol(const char *eol);
    
    #endif /* CSV_H */
    

    csv.c

    /*
    @(#)File:           $RCSfile: csv.c,v $
    @(#)Version:        $Revision: 2.2 $
    @(#)Last changed:   $Date: 2013/01/28 03:28:55 $
    @(#)Purpose:        Scanner for Comma Separated Variable (CSV) Data
    @(#)Author:         J Leffler
    @(#)Origin:         Kernighan & Pike, 'The Practice of Programming'
    */
    
    /* See RFC 4180 (http://www.ietf.org/rfc/rfc4180.txt) */
    
    #include "csv.h"
    #include <stdlib.h>
    #include <string.h>
    
    enum { NOMEM = -2 };
    
    static char *line = 0;      /* Input line */
    static char *sline = 0;     /* Split line */
    static size_t maxline = 0;  /* Size of line[] and sline[] */
    static char **field = 0;    /* Field pointers */
    static size_t maxfield = 0; /* Size of field[] */
    static size_t nfield = 0;   /* Number of fields */
    
    static char fieldsep[]= ",";    /* Field separator characters */
    static char fieldquote = '"';   /* Quote character */
    
    static char eolstr[8] = "\n";
    
    void csvreset(void)
    {
        free(line);
        free(sline);
        free(field);
        line = 0;
        sline = 0;
        field = 0;
        maxline = maxfield = nfield = 0;
    }
    
    static int endofline(FILE *ifp, int c)
    {
        int eol = (c == '\r' || c == '\n');
        if (c == '\r')
        {
            c = getc(ifp);
            if (c != '\n' && c != EOF)
                ungetc(c, ifp);
        }
        return(eol);
    }
    
    static char *advquoted(char *p)
    {
        size_t i;
        size_t j;
        for (i = j = 0; p[j] != '\0'; i++, j++)
        {
            if (p[j] == fieldquote && p[++j] != fieldquote)
            {
                size_t k = strcspn(p+j, fieldsep);
                memmove(p+i, p+j, k);
                i += k;
                j += k;
                break;
            }
            p[i] = p[j];
        }
        p[i] = '\0';
        return(p + j);
    }
    
    static int split(void)
    {
        char *p;
        char **newf;
        char *sepp;
        int sepc;
    
        nfield = 0;
        if (line[0] == '\0')
            return(0);
        strcpy(sline, line);
        p = sline;
    
        do
        {
            if (nfield >= maxfield)
            {
                maxfield *= 2;
                newf = (char **)realloc(field, maxfield * sizeof(field[0]));
                if (newf == 0)
                    return NOMEM;
                field = newf;
            }
            if (*p == fieldquote)
                sepp = advquoted(++p);
            else
                sepp = p + strcspn(p, fieldsep);
            sepc = sepp[0];
            sepp[0] = '\0';
            field[nfield++] = p;
            p = sepp + 1;
        } while (sepc == ',');
    
        return(nfield);
    }
    
    char *csvgetline(FILE *ifp)
    {
        size_t i;
        int    c;
    
        if (line == NULL)
        {
            /* Allocate on first call */
            maxline = maxfield = 1;
            line = (char *)malloc(maxline);     /*=C++=*/
            sline = (char *)malloc(maxline);    /*=C++-*/
            field = (char **)malloc(maxfield*sizeof(field[0])); /*=C++=*/
            if (line == NULL || sline == NULL || field == NULL)
            {
                csvreset();
                return(NULL);   /* out of memory */
            }
        }
        for (i = 0; (c = getc(ifp)) != EOF && !endofline(ifp, c); i++)
        {
            if (i >= maxline - 1)
            {
                char  *newl;
                char  *news;
                maxline *= 2;
                newl = (char *)realloc(line, maxline);  /*=C++=*/
                news = (char *)realloc(sline, maxline); /*=C++-*/
                if (newl == NULL || news == NULL)
                {
                    csvreset();
                    return(NULL);   /* out of memory */
                }
                line = newl;
                sline = news;
            }
            line[i] = c;
        }
        line[i] = '\0';
        if (split() == NOMEM)
        {
            csvreset();
            return(NULL);
        }
        return((c == EOF && i == 0) ? NULL : line);
    }
    
    
    char *csvgetfield(size_t n)
    {
        if (n >= nfield)
            return(0);
        return(field[n]);
    }
    
    size_t csvnfield(void)
    {
        return(nfield);
    }
    
    int csvputfield(FILE *ofp, const char *ofield)
    {
        const char escapes[] = "\",\r\n";
        if (strpbrk(ofield, escapes) != 0)
        {
            size_t len = strlen(ofield) + 2;
            const char *pos = ofield;
            while ((pos = strchr(pos, '"')) != 0)
            {
                len++;
                pos++;
            }
            char *space = malloc(len+1);
            if (space == 0)
                return EOF;
            char *cpy = space;
            pos = ofield;
            *cpy++ = '"';
            char c;
            while ((c = *pos++) != '\0')
            {
                if (c == '"')
                    *cpy++ = c;
                *cpy++ = c;
            }
            *cpy++ = '"';
            *cpy = '\0';
            int rc = fputs(space, ofp);
            free(space);
            return rc;
        }
        else
            return fputs(ofield, ofp);
    }
    
    int csvputline(FILE *ofp, char **fields, int nfields)
    {
        for (int i = 0; i < nfields; i++)
        {
            if (i > 0)
                putc(',', ofp);
            if (csvputfield(ofp, fields[i]) == EOF)
                return EOF;
        }
        return(fputs(eolstr, ofp));
    }
    
    void csvseteol(const char *eol)
    {
        size_t nbytes = strlen(eol);
        if (nbytes >= sizeof(eolstr))
            nbytes = sizeof(eolstr) - 1;
        memmove(eolstr, eol, nbytes);
        eolstr[nbytes] = '\0';
    }
    
    #ifdef TEST
    
    int main(void)
    {
        char *in_line;
    
        while ((in_line = csvgetline(stdin)) != 0)
        {
            size_t n = csvnfield();
            char *fields[n];        /* C99 VLA */
            printf("line = '%s'\n", in_line);
            for (size_t i = 0; i < n; i++)
            {
                printf("field[%zu] = '%s'\n", i, csvgetfield(i));
                printf("field[%zu] = [", i);
                csvputfield(stdout, csvgetfield(i));
                fputs("]\n", stdout);
                fields[i] = csvgetfield(i);
            }
            printf("fields[0..%zu] = ", n-1);
            csvputline(stdout, fields, n);
        }
    
        return(0);
    }
    
    #endif /* TEST */
    

    【讨论】:

      【解决方案3】:

      使用类似 C 的转义序列进行打印:

      void EscapePrint_CSV(FILE *outf, int ch) {
        // Delete or adjust these 2 arrays per code's goals
        // All simple-escape-sequence C11 6.4.4.4 and , for CSV
        static const char *escapev = ",\a\b\t\n\v\f\r\"\'\?\\";
        static const char *escapec = ",abtnvfr\"\'\?\\";
        char *p = strchr(escapev, ch);
        if (p && *p) {
          return fprintf(outf, "\\%c", escapec[p - escapev]);
        if (isprint(ch)) {
          return fputc(ch, outf);
        // Use octal as hex is problematic reading back
        return fprintf(outf, "\\%03o", ch);
        }
      }
      
      void EscapePrints_CSV(FILE *outf, const char *s) {
        int retval = 0;
        retval = EscapePrint_CSV(outf, '\"');
        if (retval) return retval;
        while (*s) {
          retval = EscapePrint_CSV(outf, *s++);
          if (retval) return retval;
        }
        return EscapePrint_CSV(outf, '\"');
      }
      

      CSV 如何处理嵌入的',' 的细节各不相同。有些人逃脱了他们。

      其他人引用整个字符串,例如"Hello, World"。然后转义" 成为一个问题。有些人通过简单地重复 "" 来逃避 '\"'

      YMMV。

      【讨论】:

        猜你喜欢
        • 2019-08-23
        • 1970-01-01
        • 1970-01-01
        • 2017-03-09
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多