【问题标题】:In boost::spirit::lex, how do I add tokens with a semantic action and a token ID?在 boost::spirit::lex 中,如何添加具有语义操作和令牌 ID 的令牌?
【发布时间】:2017-01-03 09:47:55
【问题描述】:

我知道如何使用标识符添加令牌定义:

this->self.add(identifier, ID_IDENTIFIER);

而且我知道如何使用语义操作添加标记定义:

this->self += whitespace [ lex::_pass = lex::pass_flags::pass_ignore ];

不幸的是,这不起作用:

this->self.add(whitespace
                   [ lex::_pass = lex::pass_flags::pass_ignore ],
               ID_IDENTIFIER);

提示无法将令牌转换为字符串(!?):

错误 C2664: 'const boost::spirit::lex::detail::lexer_def_>::adder &boost::spirit::lex::detail::lexer_def_>::adder::operator () (wchar_t,unsigned int) const' : 无法将参数 1 从 'const boost::proto::exprns_::expr' 转换为 'const std::basic_string,std::allocator> &'

有趣的是,lexer.hpp 中的adder 有一个operator (),它将一个动作作为第三个参数——但它在我的 boost (1.55.0) 版本中被注释掉了。这适用于较新的版本吗?

如果没有这个,我将如何向词法分析器添加带有语义操作和 ID 的标记定义?

【问题讨论】:

    标签: c++ boost boost-spirit lexer


    【解决方案1】:

    查看头文件似乎至少有两种可能的方法:

    • 您可以在定义令牌后使用token_defid 成员函数来设置id:

      ellipses = "\\.\\.\\.";
      ...
      ellipses.id(ID_ELLIPSES);
      
    • 定义token时可以使用token_def的两个参数构造函数:

      number = lex::token_def<>("[0-9]+", ID_NUMBER);
      

    然后您可以像以前一样简单地添加语义操作:

    this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n'];
    

    下面的代码是based on Boost.Spirit.Lex example3.cpp,稍作改动(标有//CHANGED)即可实现您想要的。

    完整示例(Running on rextester)

    #include <iostream>
    #include <string>
    
    #include <boost/config/warning_disable.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    
    
    
    
    using namespace boost::spirit;
    namespace phx = boost::phoenix;
    
    enum token_id //ADDED
    {
        ID_ELLIPSES = lex::min_token_id + 1,
        ID_NUMBER
    };
    
    ///////////////////////////////////////////////////////////////////////////////
    //  Token definition
    ///////////////////////////////////////////////////////////////////////////////
    template <typename Lexer>
    struct example3_tokens : lex::lexer<Lexer>
    {
        example3_tokens()
        {
            // define the tokens to match
            ellipses = "\\.\\.\\.";
            number = lex::token_def<>("[0-9]+", ID_NUMBER); //CHANGED
    
            ellipses.id(ID_ELLIPSES); //CHANGED
    
            // associate the tokens and the token set with the lexer
            this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n']; //CHANGED
    
            // define the whitespace to ignore (spaces, tabs, newlines and C-style 
            // comments)
            this->self("WS") 
                =   lex::token_def<>("[ \\t\\n]+")          // whitespace
                |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"   // C style comments
                ;
        }
    
        // these tokens expose the iterator_range of the matched input sequence
        lex::token_def<> ellipses, identifier, number;
    };
    
    ///////////////////////////////////////////////////////////////////////////////
    //  Grammar definition
    ///////////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer>
    struct example3_grammar 
      : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
    {
        template <typename TokenDef>
        example3_grammar(TokenDef const& tok)
          : example3_grammar::base_type(start)
        {
            start 
                =  +(couplet | qi::token(ID_ELLIPSES)) //CHANGED
                ;
    
            //  A couplet matches nested left and right parenthesis.
            //  For example:
            //    (1) (1 2) (1 2 3) ...
            //    ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
            //    (((1))) ...
            couplet
                =   qi::token(ID_NUMBER) //CHANGED
                |   '(' >> +couplet >> ')'
                ;
    
            BOOST_SPIRIT_DEBUG_NODE(start);
            BOOST_SPIRIT_DEBUG_NODE(couplet);
        }
    
        qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
    };
    
    ///////////////////////////////////////////////////////////////////////////////
    int main()
    {
        // iterator type used to expose the underlying input stream
        typedef std::string::iterator base_iterator_type;
    
        // This is the token type to return from the lexer iterator
        typedef lex::lexertl::token<base_iterator_type> token_type;
    
        // This is the lexer type to use to tokenize the input.
        // Here we use the lexertl based lexer engine.
        typedef lex::lexertl::actor_lexer<token_type> lexer_type; //CHANGED
    
        // This is the token definition type (derived from the given lexer type).
        typedef example3_tokens<lexer_type> example3_tokens;
    
        // this is the iterator type exposed by the lexer 
        typedef example3_tokens::iterator_type iterator_type;
    
        // this is the type of the grammar to parse
        typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
    
        // now we use the types defined above to create the lexer and grammar
        // object instances needed to invoke the parsing process
        example3_tokens tokens;                         // Our lexer
        example3_grammar calc(tokens);                  // Our parser
    
        std::string str ="(1) (1 2) (1 2 3) ... ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... (((1))) ..."; //CHANGED
    
        // At this point we generate the iterator pair used to expose the
        // tokenized input stream.
        std::string::iterator it = str.begin();
        iterator_type iter = tokens.begin(it, str.end());
        iterator_type end = tokens.end();
    
        // Parsing is done based on the token stream, not the character 
        // stream read from the input.
        // Note how we use the lexer defined above as the skip parser.
        bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
    
        if (r && iter == end)
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "-------------------------\n";
        }
        else
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "-------------------------\n";
        }
    
        std::cout << "Bye... :-) \n\n";
        return 0;
    }
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2021-06-03
      • 1970-01-01
      • 1970-01-01
      • 2018-03-29
      • 1970-01-01
      相关资源
      最近更新 更多