【问题标题】:boost regex error_stack exception thrown from regex_search从 regex_search 抛出的 boost regex error_stack 异常
【发布时间】:2019-03-31 20:52:36
【问题描述】:

当我尝试使用 boost 1.68 正则表达式搜索模式时,我写道:

#include <iostream>
#include <fstream>
#include <sstream>
#include <iterator>
#include <string>
#include <boost/regex.hpp>

int main(int argc, char** argv)
{
    const std::string pattern("\
(?(DEFINE)(?'NAMESPACE'\\w*::))(?#r)\
(?(DEFINE)(?'CONSTANT'(\"(?:[^\"\\\\]|\\\\.)*\")|(\\d+\\.?\\d*f?)))(?#r)\
(?(DEFINE)(?'VARIABLE'(?P>NAMESPACE)*([A-Za-z_]\\w*\\.)*[A-Za-z_]\\w*))(?#r)\
(?(DEFINE)(?'OPERAND'(\\+|-)*((?P>VARIABLE)|(?P>CONSTANT))))(?#r)\
(?(DEFINE)(?'EXPRESSION'\\s*(?P>OPERAND)\\s*(\\s*[\\*\\+-\\/]\\s*(?P>OPERAND))*))(?#r)\
(?(DEFINE)(?'ARGUMENTS'(?P>EXPRESSION)(,\\s*(?P>EXPRESSION))*))(?#r)\
(?(DEFINE)(?'FUNCTION_CALL'(?P>VARIABLE)\\(\\s*(?P>ARGUMENTS)?\\s*\\)))(?#r)\
(?P>FUNCTION_CALL)");
    std::cout << "pattern: " << pattern << std::endl;
    boost::regex simple_function(pattern, boost::regex_constants::perl);

    std::ifstream file("flask");
    if (file.is_open()) {
        std::string context((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
        boost::smatch results;
        boost::match_flag_type flags = boost::match_default | boost::match_single_line;
        auto start = context.cbegin();
        auto end = context.cend();
        int line_n = 0;
        try {
            while (start < end && boost::regex_search(start, end, results, simple_function, flags)) {
                std::cout << '#' << line_n++ << ' ';
                std::cout << results[0] << std::endl;
                start = (results[0].length() == 0) ? results[0].first + 1 : results[0].second;
            }
        }
        catch (...) {
            std::cout << "exception throwed." << std::endl;
        }
    }
    return 0;
}

,它在内存位置0x00000073378FE638 处抛出:boost::exception_detail::clone_impl&lt;boost::exception_detail::error_info_injector&lt;std::runtime_error&gt;&gt;。这是error_stack

但我不知道为什么,我用相同的context 和模式在regex101 和regextester 上测试了我的表达式,只有我的程序会失败并抛出异常。我做错了什么或误解了使用 boost regex 的方式吗?有什么办法可以避免error_stack

【问题讨论】:

  • 你看过消息是什么(不仅仅是它的类型)吗? std::regex 和 boost::regex 不能完全替代。
  • 我在追踪 boost 后更新了我的描述。看起来 boost 用完了堆栈,抛出一个 error_stack 异常。有什么办法可以避免这种情况?
  • 也许使用更简单的表达方式。

标签: c++ regex boost


【解决方案1】:

有趣。为此,我必须学习 Regex 的全新领域。对此表示敬意。

问题是Recursive Expressions。您需要非常确定这些模式不会轻易地随意递归,这样您就不会陷入无限递归,或者,就像这里的情况一样,“只是”递归很容易随着冗长的输入而变得很深。

所以,我先整理了一下:

const std::string pattern(
        R"((?(DEFINE)(?'NAMESPACE'\w*::)))"
        R"((?(DEFINE)(?'CONSTANT'("(?:[^"\\]|\\.)*")|(\d+\.?\d*f?))))"
        R"((?(DEFINE)(?'VARIABLE'(?P>NAMESPACE)*([A-Za-z_]\w*\.)*[A-Za-z_]\w*)))"
        R"((?(DEFINE)(?'OPERAND'(\+|-)*((?P>VARIABLE)|(?P>CONSTANT)))))"
        R"((?(DEFINE)(?'EXPRESSION'\s*(?P>OPERAND)\s*(\s*[\*\+-\/]\s*(?P>OPERAND))*)))"
        R"((?(DEFINE)(?'ARGUMENTS'(?P>EXPRESSION)(,\s*(?P>EXPRESSION))*)))"
        R"((?(DEFINE)(?'FUNCTION_CALL'(?P>VARIABLE)\(\s*(?P>ARGUMENTS)?\s*\))))"
        R"((?P>FUNCTION_CALL))");

现在我开始“理解”该模式,我决定我可能不会将 Regex 用于语法¹,并在 Spirit X3 中重写它:

namespace rules {
    using namespace x3;

    auto WORD            = (alnum | char_('_'));
    auto NAMESPACE       = +WORD >> "::";
    auto CONSTANT        = ( lexeme [ '"' >> *~char_('"') >> '"' ] | double_ );
    auto ident           = lexeme [ char_("A-Za-z_") >> *WORD ];
    auto VARIABLE        = *NAMESPACE >> ident % '.';
    auto OPERAND         = *(char_("+-")) >> (VARIABLE | CONSTANT);
    auto EXPRESSION      = OPERAND % char_("*+/-");
    auto ARGUMENTS       = EXPRESSION % ',';
    auto FUNCTION_CALL   = VARIABLE >> '(' >> -ARGUMENTS >> ')';

    auto simple_function = rule<struct simple_function_, std::string> {"simple_function"}
                         = skip(space) [ x3::raw[FUNCTION_CALL] ];
}

现在,由于在更多相关位置接受空格(skiplexeme²),因此这更加准确。此外,它显然没有遇到糟糕的回溯问题:

Live On Wandbox

#include <iostream>
#include <fstream>
#include <sstream>
#include <iterator>
#include <string>
#include <boost/regex.hpp>
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;

namespace rules {
    using namespace x3;

    auto WORD            = (alnum | char_('_'));
    auto NAMESPACE       = +WORD >> "::";
    auto CONSTANT        = ( lexeme [ '"' >> *~char_('"') >> '"' ] | double_ );
    auto ident           = lexeme [ char_("A-Za-z_") >> *WORD ];
    auto VARIABLE        = *NAMESPACE >> ident % '.';
    auto OPERAND         = *(char_("+-")) >> (VARIABLE | CONSTANT);
    auto EXPRESSION      = OPERAND % char_("*+/-");
    auto ARGUMENTS       = EXPRESSION % ',';
    auto FUNCTION_CALL   = VARIABLE >> '(' >> -ARGUMENTS >> ')';

    auto simple_function = rule<struct simple_function_, std::string> {"simple_function"}
                         = skip(space) [ x3::raw[FUNCTION_CALL] ];
}

int main()
{
    std::ifstream file("flask");
    std::string const context(std::istreambuf_iterator<char>(file), {});

    std::vector<std::string> calls;
    parse(context.begin(), context.end(), *x3::seek[rules::simple_function], calls);

    for (auto& call : calls) {
        std::cout << call << "\n";
    }
}

打印出来的

anno::copyright_notice("XXXXX")
anno::author("Someone")
anno::contributor("")
state::texture_coordinate(0)
state::texture_tangent_u(0)
state::texture_tangent_v(0)

¹我知道 Perl6 很棒,但仍然

²Boost spirit skipper issues

更新/奖励

只是为了展示除了匹配文本与 Spirit X3 之外的一些内容,这里对快速端口进行了轻微改进,展示了如何使用相同的规则来解析强类型的 AST 数据类型。

所做的更改:

  • 修复了在命名空间限定符中没有lexeme 标识符的错误
  • 同时使名称空间的标识符解析一致(很可能,名称空间名称也不能以数字字符开头)
  • 解析为强类型数据类型AST::VariableAST::Literal(用于字符串或数字文字)和AST::FunctionCall
  • 支持字符串文字中的转义。这意味着"A\"B" 现在将被正确解析为包含A"BAST::Literal
  • 如果您检查调试输出 (#define BOOST_SPIRIT_X3_DEBUG),您实际上可以看到这些文字正在被解析

Live On Wandbox

//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <fstream>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
namespace x3 = boost::spirit::x3;

namespace AST {
    struct Variable {
        std::vector<std::string> namespaces, nested_objects;

        friend std::ostream& operator<<(std::ostream& os, Variable const& v) {
            for (auto ns : v.namespaces)
                os << '[' << ns << "]::";
            bool first = true;
            for (auto obj : v.nested_objects) {
                os << (first?"":".") << '[' << obj << ']';
                first = false;
            }
            return os;
        }
    };

    using Literal = boost::variant<std::string, double>;

    struct FunctionCall {
        Variable name;
        std::vector<std::string> arguments;
    };
}

BOOST_FUSION_ADAPT_STRUCT(AST::Variable, namespaces, nested_objects)
BOOST_FUSION_ADAPT_STRUCT(AST::FunctionCall, name, arguments)

namespace rules {
    using namespace x3;

    auto ident           = rule<struct ident_, std::string> {"ident"}
                         = lexeme [ raw [ (alpha|'_') >> *(alnum|'_') ] ];
    auto namespace_      = rule<struct namespace_, std::string> {"namespace_"}
                         = ident >> "::";
    auto quoted_str      = rule<struct quoted_str_, std::string> {"quoted_str"}
                         = lexeme [ '"' >> *('\\' >> char_ | ~char_('"')) >> '"' ];
    auto constant        = rule<struct constant_, AST::Literal> {"constant"}
                         = quoted_str | double_;
    auto variable        = rule<struct variable_, AST::Variable> {"variable"}
                         = *namespace_ >> ident % '.';
    auto operand         = rule<struct operand_> {"operand"}
                         = *char_("+-") >> (variable | constant);
    auto expression      = rule<struct expression_, std::string> {"expression"}
                         = raw [ operand % char_("*+/-") ];
    auto arguments       = expression % ',';
    auto function_call   = rule<struct function_call_, AST::FunctionCall> {"function_call"}
                         = variable >> '(' >> -arguments >> ')';

    auto simple_function = skip(space) [ function_call ];
}

int main()
{
    // parsing the raw sources out as string
    {
        std::ifstream file("flask");
        boost::spirit::istream_iterator f(file), l;

        std::vector<std::string> src;
        parse(f, l, *x3::seek[x3::raw[rules::simple_function]], src);

        for (auto& call : src)
            std::cout << call << "\n";
    }

    // parsing AST::FunctionCall objects
    {
        std::ifstream file("flask");
        boost::spirit::istream_iterator f(file), l;

        std::vector<AST::FunctionCall> parsed;
        parse(f, l, *x3::seek[rules::simple_function], parsed);

        for (auto& call : parsed) {
            std::cout << call.name << "\n";
            for (auto& argument : call.arguments)
                std::cout << " - argument: " << argument << "\n";
        }
    }
}

同时打印“源”解析和“AST”解析:

anno::copyright_notice("XXXXX")
anno::author("Som\"e\"one")
anno::contributor("")
state::texture_coordinate(0)
state::texture_tangent_u(0)
state::texture_tangent_v(0)
[anno]::[copyright_notice]
 - argument: "XXXXX"
[anno]::[author]
 - argument: "Som\"e\"one"
[anno]::[contributor]
 - argument: ""
[state]::[texture_coordinate]
 - argument: 0
[state]::[texture_tangent_u]
 - argument: 0
[state]::[texture_tangent_v]
 - argument: 0

【讨论】:

  • 添加了一个更新/奖励,显示解析为强类型 AST,并进行了一些改进。见Live On Wandbox
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 2018-06-25
  • 1970-01-01
  • 1970-01-01
  • 2023-03-28
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多