【问题标题】:ANTLR 3 bug, mismatched input, but what's wrong?ANTLR 3 错误,输入不匹配,但有什么问题?
【发布时间】:2014-07-06 19:41:58
【问题描述】:

我有以下问题: 我的 ANTLR 3 语法可以编译,但我的简单测试程序不起作用。语法如下: 语法Rietse;

options {
    k=1;
    language=Java;
    output=AST;
}

tokens {
    COLON       =   ':'     ;
    SEMICOLON   =   ';'     ;
    OPAREN      =   '('     ;
    CPAREN      =   ')'     ;
    COMMA       =   ','     ;
    OCURLY      =   '{'     ;
    CCURLY      =   '}'     ;
    SINGLEQUOTE =   '\''    ;

    // operators
    BECOMES     =   '='     ;
    PLUS        =   '+'     ;
    MINUS       =   '-'     ;
    TIMES       =   '*'     ;
    DIVIDE      =   '/'     ;
    MODULO      =   '%'     ;

    EQUALS      =   '=='    ;
    LT          =   '<'     ;
    LTE         =   '<='    ;
    GT          =   '>'     ;
    GTE         =   '>='    ;
    UNEQUALS    =   '!='    ;
    AND         =   '&&'    ;
    OR          =   '||'    ;
    NOT         =   '!'     ;

    // keywords
    PROGRAM     =   'program'               ;
    COMPOUND    =   'compound'              ;
    UNARY       =   'unary'                 ;
    DECL        =   'decl'                  ;
    SDECL       =   'sdecl'                 ;
    STATIC      =   'static'                ;
    PRINT       =   'print'                 ;
    READ        =   'read'                  ;
    IF          =   'if'                    ;
    THEN        =   'then'                  ;
    ELSE        =   'else'                  ;
    DO          =   'do'                    ;
    WHILE       =   'while'                 ;

    // types
    INTEGER     =   'int'                   ;
    CHAR        =   'char'                  ;
    BOOLEAN     =   'boolean'               ;
    TRUE        =   'true'                  ;
    FALSE       =   'false'                 ;
}

@lexer::header {
package Eindopdracht;
}

@header {
package Eindopdracht;
}

// Parser rules

program
    :   program2 EOF
            ->  ^(PROGRAM program2)
    ;

program2
    :   (declaration* statement)+
    ;

declaration
    :   STATIC type IDENTIFIER SEMICOLON -> ^(SDECL type IDENTIFIER)
    |   type IDENTIFIER SEMICOLON -> ^(DECL type IDENTIFIER)
    ;

type
    :   INTEGER
    |   CHAR
    |   BOOLEAN
    ;

statement
    :   assignment_expr SEMICOLON!
    |   while_stat SEMICOLON!
    |   print_stat SEMICOLON!
    |   if_stat SEMICOLON!
    |   read_stat SEMICOLON!
    ;

while_stat
    :   WHILE^ OPAREN! or_expr CPAREN! OCURLY! statement+ CCURLY!  // while (expression) {statement+}
    ;

print_stat
    :   PRINT^ OPAREN! or_expr (COMMA! or_expr)* CPAREN!          // print(expression)
    ;

read_stat
    :   READ^ OPAREN! IDENTIFIER (COMMA! IDENTIFIER)+ CPAREN!   // read(expression)
    ;

if_stat
    :   IF^ OPAREN! or_expr CPAREN! comp_expr (ELSE! comp_expr)?     // if (expression) compound else compound
    ;

assignment_expr
    :   or_expr (BECOMES^ or_expr)*
    ;

or_expr
    :   and_expr (OR^ and_expr)*
    ;

and_expr
    :   compare_expr (AND^ compare_expr)*
    ;

compare_expr
    :   plusminus_expr ((LT|LTE|GT|GTE|EQUALS|UNEQUALS)^ plusminus_expr)?
    ;

plusminus_expr
    :   timesdivide_expr ((PLUS | MINUS)^ timesdivide_expr)*
    ;

timesdivide_expr
    :   unary_expr ((TIMES | DIVIDE | MODULO)^ unary_expr)*
    ;

unary_expr
    :   operand
    |   PLUS operand -> ^(UNARY PLUS operand)
    |   MINUS operand -> ^(UNARY MINUS operand)
    |   NOT operand -> ^(UNARY NOT operand)
    ;

operand
    :   TRUE
    |   FALSE
    |   charliteral
    |   IDENTIFIER
    |   NUMBER
    |   OPAREN! or_expr CPAREN!
    ;

comp_expr
    :   OCURLY program2 CCURLY -> ^(COMPOUND program2)
    ;

// Lexer rules

charliteral
    :   SINGLEQUOTE! LETTER SINGLEQUOTE!
    ;

IDENTIFIER
    :   LETTER (LETTER | DIGIT)*
    ;

NUMBER
    :   DIGIT+
    ;

COMMENT
    :   '//' .* '\n' 
            { $channel=HIDDEN; }
    ;

WS
    :   (' ' | '\t' | '\f' | '\r' | '\n')+
            { $channel=HIDDEN; }
    ;

fragment DIGIT  :   ('0'..'9') ;
fragment LOWER  :   ('a'..'z') ;
fragment UPPER  :   ('A'..'Z') ;
fragment LETTER :   LOWER | UPPER ;

// EOF

然后我使用以下 java 文件来测试程序: 包包;

import java.io.FileInputStream;
import java.io.InputStream;

import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.BufferedTreeNodeStream;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.DOTTreeGenerator;
import org.antlr.runtime.tree.TreeNodeStream;
import org.antlr.stringtemplate.StringTemplate;

public class Rietse {
    public static void main (String[] args)
    {
        String inputFile = args[0];
        try {
            InputStream in = inputFile == null ? System.in : new FileInputStream(inputFile);
            RietseLexer lexer = new RietseLexer(new ANTLRInputStream(in));
            CommonTokenStream tokens = new CommonTokenStream(lexer);
            RietseParser parser = new RietseParser(tokens);

            RietseParser.program_return result = parser.program();

        } catch (RietseException e) {
            System.err.print("ERROR: RietseException thrown by compiler: ");
            System.err.println(e.getMessage());
        } catch (RecognitionException e) {
            System.err.print("ERROR: recognition exception thrown by compiler: ");
            System.err.println(e.getMessage());
            e.printStackTrace();
        } catch (Exception e) {
            System.err.print("ERROR: uncaught exception thrown by compiler: ");
            System.err.println(e.getMessage());
            e.printStackTrace();
        }
    }
}

最后是测试程序本身:

print('a');

现在当我运行它时,我收到以下错误:

line 1:7 mismatched input 'a' expecting LETTER
line 1:9 mismatched input ')' expecting LETTER

我不知道是什么导致了这个错误。我已经尝试了一些改变,但没有解决它。这里有谁知道我的代码有什么问题以及如何修复它? 非常感谢您的每一点帮助,在此先感谢。

您好, 里恩

【问题讨论】:

    标签: parsing input antlr3 lexer


    【解决方案1】:

    使用规则:

    CHARLITERAL
    :   SINGLEQUOTE (LETTER | DIGIT) SINGLEQUOTE
    ;
    

    并将操作数更改为:

    operand
    :   TRUE
    |   FALSE
    |   CHARLITERAL
    |   IDENTIFIER
    |   NUMBER
    |   OPAREN! or_expr CPAREN!
    ;
    

    将解决问题。它确实给出了在 AST 中有单引号的问题,但是可以通过使用

    更改节点的文本来选择解决
    setText(String);
    

    方法。

    【讨论】:

      【解决方案2】:

      charliteral 转换为词法分析器规则(将其重命名为CHARLITERAL)。现在,字符串'a' 被标记为:SINGLEQUOTE IDENTIFIER SINGLEQUOTE,所以你得到的是IDENTIFIER 而不是LETTER

      鉴于您使用的是解析器规则中的片段 (LETTER),我想知道这段代码如何编译。

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2022-11-10
        • 1970-01-01
        • 2023-04-01
        相关资源
        最近更新 更多