【问题标题】:How do I parse comma-delimited string in C++ with some elements being quoted with commas?如何在 C++ 中解析逗号分隔的字符串,其中一些元素用逗号引用?
【发布时间】:2021-06-24 22:52:58
【问题描述】:

我有一个逗号分隔的字符串,我想将它存储在字符串向量中。字符串和向量是:

string s = "1, 10, 'abc', 'test, 1'";
vector<string> v;

理想情况下,我希望字符串 'abc' 和 'test, 1' 存储时不使用单引号,如下所示,但我可以使用单引号来存储它们:

v[0] = "1";
v[1] = "10";
v[2] = "abc";
v[3] = "test, 1";

【问题讨论】:

  • 这能回答你的问题吗? How do I iterate over the words of a string?
  • 就这样您一直在思考这个问题,还要考虑何时需要在单词中同时使用引号和双引号(不仅仅是逗号)。想想 C++ 语法,它如何使用反斜杠双引号作为插入双引号的转义序列。您的文件需要自己的语法,并且您的逻辑需要理解该语法(不仅仅是逗号)。

标签: c++ string csv c++11


【解决方案1】:
bool nextToken(const string &s, string::size_type &start, string &token)
{
    token.clear();
    
    start = s.find_first_not_of(" \t", start);
    if (start == string::npos)
        return false;
    
    string::size_type end;
    
    if (s[start] == '\'')
    {
        ++start;
        end = s.find('\'', start);
    }
    else
        end = s.find_first_of(" \t,", start);
    
    if (end == string::npos)
    {
        token = s.substr(start);
        start = s.size();
    }
    else
    {
        token = s.substr(start, end-start);
        if ((s[end] != ',') && ((end = s.find(',', end + 1)) == string::npos))
            start = s.size();
        else
            start = end + 1;
    }
    
    return true;
}
string s = "1, 10, 'abc', 'test, 1'", token;
vector<string> v;
 
string::size_type start = 0;
while (nextToken(s, start, token))
    v.push_back(token);

Demo

【讨论】:

    【解决方案2】:

    您需要在这里做的是让自己成为一个解析器,可以按照您的需要进行解析。这里我为你做了一个解析函数:

    #include <string>
    #include <vector>
    using namespace std;
    
    vector<string> parse_string(string master) {
        char temp; //the current character
        bool encountered = false; //for checking if there is a single quote
        string curr_parse; //the current string
        vector<string>result; //the return vector
    
        for (int i = 0; i < master.size(); ++i) { //while still in the string
            temp = master[i]; //current character
            switch (temp) { //switch depending on the character
    
            case '\'': //if the character is a single quote
                
                if (encountered) encountered = false; //if we already found a single quote, reset encountered
                else encountered = true; //if we haven't found a single quote, set encountered to true
                [[fallthrough]];
    
            case ',': //if it is a comma
    
                if (!encountered) { //if we have not found a single quote
                    result.push_back(curr_parse); //put our current string into our vector
    
                    curr_parse = ""; //reset the current string
                    break; //go to next character
                }//if we did find a single quote, go to the default, and push_back the comma
                [[fallthrough]];
    
            default: //if it is a normal character
                if (encountered && isspace(temp)) curr_parse.push_back(temp); //if we have found a single quote put the whitespace, we don't care
                else if (isspace(temp)) break; //if we haven't found a single quote, trash the  whitespace and go to the next character
                else if (temp == '\'') break; //if the current character is a single quote, trash it and go to the next character.
                else curr_parse.push_back(temp); //if all of the above failed, put the character into the current string
                break; //go to the next character
            }
        }
        for (int i = 0; i < result.size(); ++i) { 
            if (result[i] == "") result.erase(result.begin() + i);  
            //check that there are no empty strings in the vector
            //if there are, delete them
        }
        return result;
    }
    

    这会根据您的需要解析您的字符串,并返回一个向量。然后,您可以在程序中使用它:

    #include <iostream>
    int main() {
        string s = "1, 10, 'abc', 'test, 1'";
        vector<string> v = parse_string(s);
    
        for (int i = 0; i < v.size(); ++i) {
            cout << v[i] << endl;
        }
    }
    

    并正确打印出来:

    1
    10
    abc
    test, 1
    

    【讨论】:

    • 没有break的引用案例,如果是故意的,我建议使用[[fallthrough]]
    • 是的,是故意的,我一会儿补上
    【解决方案3】:

    一个合适的解决方案需要一个解析器实现。如果您需要快速破解,只需编写一个单元格读取函数 (demo)。 c++14 的std::quoted 操纵器在这里有很大的帮助。唯一的问题是操纵器需要一个流。这很容易用istringstream 解决——见第二个函数。请注意,您的字符串格式为CELL COMMA CELL COMMA... CELL

    istream& get_cell(istream& is, string& s)
    {
      char c;
      is >> c; // skips ws
      is.unget(); // puts back in the stream the last read character
    
      if (c == '\'')
        return is >> quoted(s, '\'', '\\'); // the first character of the cell is ' - read quoted
      else
        return getline(is, s, ','), is.unget(); // read unqoted, but put back comma - we need it later, in get function
    }
    
    
    vector<string> get(const string& s)
    {
      istringstream iss{ s };
      string cell;
      vector<string> r;
      while (get_cell(iss, cell))
      {
        r.push_back( cell );
        char comma;
        iss >> comma; // expect a cell separator
        if (comma != ',')
          break; // cell separator not found; we are at the end of stream/string - break the loop
      }
    
      if (char c; iss >> c) // we reached the end of what we understand - probe the end of stream
        throw "ill formed";
    
      return r;
    }
    

    这就是你使用它的方式:

    int main()
    {
      string s = "1, 10, 'abc', 'test, 1'";
      try
      {
        auto v = get(s);;
      }
      catch (const char* e)
      {
        cout << e;
      }
    }
    

    【讨论】:

      猜你喜欢
      • 2021-11-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2010-11-14
      • 2011-02-20
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多