【问题标题】:Split unquoted string in C在C中拆分不带引号的字符串
【发布时间】:2016-12-24 19:43:26
【问题描述】:

我正在编写一个函数来将字符串拆分为指向指针的指针,如果分隔符是空格,我只想拆分不在引号内的单词。例如Hello world "not split" 应该返回

Hello
world
"not split"

该函数以某种方式拆分引号内的单词,而不是拆分引号外的单词。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int is_quotes(char *s)
{
    int i;
    int count;

    i = 0;
    count = 0;
    while (s[i])
    {
        if (s[i] == '"')
            count++;
        i++;
    }
    if (count == 0)
        count = 1;
    return (count % 2);
}

int count_words(char *s, char sep)
{
    int check;
    int i;
    int count;

    check = 0;
    if (sep == ' ')
      check = 1;
    i = 0;
    count = 0;
    while (*s && *s == sep)
        ++s;
    if (*s)
        count = 1;
    while (s[i])
    {
        if (s[i] == sep)
        {
          if (!is_quotes(s + i) && check)
          {
            i += 2;
            while (s[i] != 34 && s[i])
                i++;
          }
          count++;
        }
        i++;
    }
    return (count);
}

char    *ft_strsub(char const *s, unsigned int start, size_t len)
{
    char    *sub;

    sub = malloc(len + 1);
    if (sub)
        memcpy(sub, s + start, len);
    return (sub);
}

char        **ft_strsplit(char const *s, char c)
{
    int     words;
    char    *start;
    char    **result;
    int     i;

    words = count_words((char *)s, c);
    if (!s || !c || words == 0)
        return (NULL);
    i = 0;
    result = (char **)malloc(sizeof(char *) * (words + 1));
    start = (char *)s;
    while (s[i])
    {
        if (s[i] == c)
        {
            if (is_quotes((char *)s + i) == 0 && c == ' ')
            {
                i += 2;
                while (s[i] != '"' && s[i])
                    i++;
                i -= 1;
            }
            if (start != (s + i))
                *(result++) = ft_strsub(start, 0, (s + i) - start);
            start = (char *)(s + i) + 1;
        }
        ++i;
    }
    if (start != (s + i))
        *(result++) = ft_strsub(start, 0, (s + i) - start);
    *result = NULL;
    return (result - words);
}

int main(int argc, char **argv)
{
    if (argc > 1)
    {
        char **s;
        s = ft_strsplit(argv[1], ' ');
        int i = 0;
        while (s[i])
            printf("%s\n", s[i++]);
    }
  return 0;
}

当我使用hello world "hello hello" 运行此代码时,我得到以下信息

hello world
"hello
hello"

【问题讨论】:

  • @Olaf 抱歉,我的意思是指向指针 **
  • 使用调试器单步调试您的代码。
  • 代码中没有main函数。
  • 我们不必弄清楚count_words() 的工作原理——你应该向我们展示相关代码。也可以显示main() 函数;它不应该很大,并且会变成 M​​CVE (minimal reproducible example)。在ft_strsplit() 你有:words = count_words((char *)s, c); if (!s || !c || words == 0) return (NULL);——count_words() 是否处理s == 0c == 0 的情况?尽快摆脱不可能的事情。考虑在运行时检查之前添加一个assert(s != 0 &amp;&amp; c != 0); 断言。
  • 对不起,我忘了包括 count_words()main()

标签: c string split


【解决方案1】:

您需要一个具有两种状态的状态机,即引用和关闭引用。当您点击报价时,翻转状态。当您点击空格时,如果不在引号中,则转换为换行符,而不是在引号中。 (您很快就会希望使其更精细以允许字符串转义等,状态机方法可以扩展至此)。

【讨论】:

    【解决方案2】:

    试试这个(修复和减少)

    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    
    typedef struct token {
        const char *top;
        const char *end;//point to next character
    } Token;
    
    Token getToken(const char **sp, char sep){
        const char *s = *sp;
        const char *top, *end;
        Token token = { NULL, NULL};
    
        while(*s && *s == sep)//skip top separators
            ++s;
        if(!*s){
            *sp = s;
            return token;//return null token
        }
        token.top = s;
        while(*s && *s != sep){
            if(*s == '"'){
                char *p = strchr(s + 1, '"');//search end '"'
                if(p)
                    s = p;//skip to '"'
            }
            ++s;
        }
        token.end = s;
        *sp = s;
    
        return token;
    }
    
    int count_words(const char *s, char sep){
        int count = 0;
        Token token = getToken(&s, sep);
    
        while(token.top != NULL){
            ++count;
            token = getToken(&s, sep);
        }
        return count;
    }
    
    char *ft_strsub(Token token){
        size_t len = token.end - token.top;
        char *sub = malloc(len + 1);
        if (sub){
            memcpy(sub, token.top, len);
            sub[len] = 0;
        }
        return sub;
    }
    
    char **ft_strsplit(const char *s, char sep){
        int words;
    
        if (!s || !sep || !(words = count_words(s, sep)))
            return NULL;
    
        char **result = malloc(sizeof(char *) * (words + 1));
        if(!result){
            perror("malloc");
            return NULL;
        }
    
        int i = 0;
        Token token = getToken(&s, sep);
    
        while(token.top != NULL){
            result[i++] = ft_strsub(token);
            token = getToken(&s, sep);
        }
        result[i] = NULL;
    
        return result;
    }
    
    int main(int argc, char **argv){
        const char *text = "Hello world \"not split\"";
        char **s = ft_strsplit(text, ' ');
        int i = 0;
        while (s[i]){
            printf("%s\n", s[i]);
            free(s[i++]);
        }
        free(s);
    
        return 0;
    }
    

    转义字符处理版本。

    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    
    #define ESCAPE '\\' //ESCAPE CHARACTER
    
    typedef struct token {
        const char *top;
        const char *end;//point to next character
    } Token;
    
    Token getToken(const char **sp, char sep){
        const char *s = *sp;
        const char *top, *end;
        Token token = { NULL, NULL};
    
        while(*s && *s == sep)//skip top separators
            ++s;
        if(!*s){
            *sp = s;
            return token;
        }
        token.top = s;
        while(*s && *s != sep){
            if(*s == ESCAPE)
                ++s;
            else if(*s == '"'){
                char *p = strchr(s + 1, '"');//search end '"'
                while(p && p[-1] == ESCAPE)
                    p = strchr(p + 1, '"');
                if(p)
                    s = p;
            }
            ++s;
        }
        token.end = s;
        *sp = s;
    
        return token;
    }
    
    int count_words(const char *s, char sep){
        int count = 0;
        Token token = getToken(&s, sep);
    
        while(token.top != NULL){
            ++count;
            token = getToken(&s, sep);
        }
        return count;
    }
    
    char *remove_escape(char *s){
        char *from, *to;
        from = to = s;
        while(*from){
            if(*from != ESCAPE)
                *to++ = *from;
            ++from;
        }
        *to = 0;
        return s;
    }
    
    char *ft_strsub(Token token){
        size_t len = token.end - token.top;
        char *sub = malloc(len + 1);
        if (sub){
            memcpy(sub, token.top, len);
            sub[len] = 0;
        }
        return sub;
    }
    
    char **ft_strsplit(const char *s, char sep){
        int words;
    
        if (!s || !sep || !(words = count_words(s, sep)))
            return NULL;
    
        char **result = malloc(sizeof(char *) * (words + 1));
        if(!result){
            perror("malloc");
            return NULL;
        }
    
        Token token = getToken(&s, sep);
        int i = 0;
    
        while(token.top != NULL){
            result[i] = ft_strsub(token);
            remove_escape(result[i++]);
            token = getToken(&s, sep);
        }
        result[i] = NULL;
    
        return result;
    }
    
    void test(const char *text){
        printf("original:%s\n", text);
        printf("result of split:\n");
        char **s = ft_strsplit(text, ' ');
        int i = 0;
        while (s[i]){
            printf("%s\n", s[i]);
            free(s[i++]);
        }
        free(s);
        puts("");
    }
    
    int main(int argc, char **argv){
        test("Hello world \"not split\"");
        test("Hello world \"not \\\" split\"");//include " in "..."
        test("Hello world not\\ split");//escape separator
    
        return 0;
    }
    

    结果:

    original:Hello world "not split"
    result of split:
    Hello
    world
    "not split"
    
    original:Hello world "not \" split"
    result of split:
    Hello
    world
    "not " split"
    
    original:Hello world not\ split
    result of split:
    Hello
    world
    not split
    

    【讨论】:

    • 感谢您的修复。
    • 有没有办法让它在这个\ 上不分裂,空间被转义了?比如sound\ 1.mp3
    • @julekgwa 你说的是命令行吗?在 C "\\ "。需要在getToken中加入转义处理。
    • 是的,我正在编写一个小命令行,如果用户键入一个有空格的文件名,我不想将其拆分为 adele\ hello.pm3 之类的部分。
    • @julekgwa 我添加了转义字符处理示例。
    猜你喜欢
    • 2011-04-16
    • 1970-01-01
    • 1970-01-01
    • 2018-05-06
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多