【发布时间】:2014-05-27 11:10:10
【问题描述】:
我使用 ANTLR4 语法 编写了一个简单的词法分析器和解析器,为 NetBeans 7.3 制作了一个语言插件,以帮助团队更快地编写我们的布局文件(XHTML 和小部件定义的混合形式)的 XHTML 标记,但具有自定义属性、特征,并且与 XHTML 语法有一些差异)。
模板文件示例:
<div style="dyn_layout_panel">
@symbol@
<w_label=label, text="Try to close this window" />
<w_buttonclose=button, text = "CLOSE", on_press=press_close />
<w_buttonterminate=button, text="TERMINATE", on_press=press_terminate />
<w_mydatepicker=datepicker, parent=tab0, ary=[10, "str", /regex/i], start_date=2013-10-05, on_selected=datepicker_selected />
<w_myeditbox=editbox, parent=tab0, validation=USER_REGEX, validation_regex=/^[0-9]+[a-z]*$/i,
validation_msg="User regex don't match editbox contents.", on_keyreturn=tab0_editbox_keyreturn />
<div style="dyn_layout_panel">
$SYMBOL_2$
Some text that make a text node.
</div>
</div>
我使用 AnltrWorks 2 编写和调试词法分析器和解析器,一切似乎都很好,在 NetBeans 中我也没有遇到任何异常,并且解析器工作正常,但 在编辑/键入期间,我在光标。
问题截图:
为每次击键添加调试控制台输出我看到词法分析器正确进入 IN_TAG 或 IN_WIDGET 模式,但在 WHITESPACE 之后返回到默认模式并匹配标记内的其余文本作为 TEXT_NODE 标记。
我知道词法分析器一次只能有一个活动模式,所以因为它在 IN_TAG 或 IN_WIDGET 模式下匹配 TEXT_NODE 规则?
词法分析器语法文件:
lexer grammar LayoutLexer;
COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;
WS : ( ' '
| '\t'
| EOL
)+? -> channel(HIDDEN)
;
WDG_START_OPEN : '<w_' PROPERTY -> pushMode(IN_WIDGET) ;
WDG_END_OPEN : '</w_' PROPERTY -> pushMode(IN_WIDGET) ;
TAG_START_OPEN : '<' ATTRIBUTE -> pushMode(IN_TAG) ;
TAG_END_OPEN : '</' ATTRIBUTE -> pushMode(IN_TAG) ;
EXT_REF
: ( ('@' REF_NAME '@') | ('$' SYMBOL '$') | ('§' REF_NAME '§') )
;
fragment
REF_NAME
: ( [a-z]+ [0-9a-z_]*? )
;
fragment
EOL : ( '\r\n' | '\n\r' | '\n' )
;
EQUAL
: '='
;
TEXT_NODE
: ( (~('\r'|'\n'|'<'|'@'|'$'|'§'))+ )
;
ERROR
: ( .+? )
;
mode IN_TAG;
TAG_CLOSE : '>' -> popMode ;
TAG_EMPTY_CLOSE : '/>' -> popMode ;
TAG_WS : WS -> type(WS), channel(HIDDEN) ;
TAG_COMMENT : COMMENT -> type(COMMENT), channel(HIDDEN) ;
TAG_EQ : EQUAL -> type(EQUAL) ;
ATTRIBUTE
: ( LITERAL [0-9a-zA-Z_]* )
;
VAL
: ( '"' ( ESC_SEQ | ~('\\'|'"') )*? '"'
| '\'' ( ESC_SEQ | ~('\\'|'\'') )*? '\'' )
;
TAG_ERR : ERROR -> type(ERROR) ;
mode IN_WIDGET;
WDG_CLOSE : '>' -> popMode ;
WDG_EMPTY_CLOSE : '/>' -> popMode ;
WDG_WS : WS -> type(WS), mode(IN_WIDGET), channel(HIDDEN) ;
WDG_COMMENT : COMMENT -> type(COMMENT), channel(HIDDEN) ;
WDG_EQ : EQUAL -> type(EQUAL), pushMode(WDG_ASSIGN) ;
COMMA
: ','
;
fragment
MINUS
: '-'
;
STRING
: ( '"' ( ESC_SEQ | ~('\\'|'"') )*? '"'
| '\'' ( ESC_SEQ | ~('\\'|'\'') )*? '\'' )
;
fragment
ESC_SEQ
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UNICODE_ESC
| OCTAL_ESC
;
fragment
OCTAL_ESC
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UNICODE_ESC
: '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
fragment
HEX_DIGIT
: [0-9a-fA-F]
;
fragment
DIGIT
: [0-9]
;
fragment
HEX_NUMBER
: '0x' HEX_DIGIT+
;
fragment
HTML_NUMBER
: (INT_NUMBER | FLOAT_NUMBER) HTML_UNITS
;
fragment
FLOAT_NUMBER
: MINUS? INT_NUMBER '.' DIGIT+
;
fragment
INT_NUMBER
: MINUS? DIGIT+
;
EVENT_HANDLER
: 'on_' PROPERTY
;
PROPERTY
: ( LITERAL [0-9a-zA-Z_]* )
;
fragment
LITERAL
: ( LITERAL_U | LITERAL_L )
;
fragment
LITERAL_U
: [A-Z]+
;
fragment
LITERAL_L
: [a-z]+
;
WDG_ERR : ERROR -> type(ERROR) ;
mode WDG_ASSIGN;
PHP_REF
: ( LITERAL_L ('_' | LITERAL_L | [0-9])* ) -> popMode
;
VALUE : (WDG_VAL | ARRAY) -> popMode;
ASGN_WS : WS -> type(WS), channel(HIDDEN);
ASGN_COMMA : COMMA -> type(COMMA);
ARY_START
: '['
;
ARY_END
: ']'
;
BIT_OR
: '|'
;
ARRAY
: ARY_START ARY_VALUE (ASGN_COMMA ARY_VALUE)* ARY_END
;
fragment
ARY_VALUE : ASGN_WS? WDG_VAL ASGN_WS? -> type(VALUE);
fragment
WDG_VAL
: (STRING
| UTC_DATE
| HEX_NUMBER
| HTML_NUMBER
| FLOAT_NUMBER
| INT_NUMBER
| BOOLEAN
| BITFIELD
| REGEX
| CSS_CLASS)
;
fragment
HTML_UNITS
: ('%'|'in'|'cm'|'mm'|'em'|'ex'|'pt'|'pc'|'px')
;
fragment
BOOLEAN
: ('true'|'false')
;
fragment
BITFIELD
: SYMBOL (WS? BIT_OR WS? SYMBOL)*
;
SYMBOL
: LITERAL_U [0-9A-Z_]*
;
UTC_DATE
: (DIGIT DIGIT DIGIT DIGIT '-' DIGIT DIGIT '-' DIGIT DIGIT)
;
REGEX
: ('/' ('\\'.|.)*? '/' ('g'|'m'|'i')* )
;
CSS_CLASS
: ( LITERAL_L ('-' | '_' | LITERAL_L | [0-9])* )
;
WDG_ASSIGN_ERR : ERROR -> type(ERROR), popMode;
解析器语法文件:
parser grammar LayoutParser;
options
{
tokenVocab=LayoutLexer;
language=Java;
}
document : (element | TEXT_NODE | EXT_REF)* EOF;
element
locals
[
String currentTag
]
: ( ( html_open_tag (element | TEXT_NODE | EXT_REF)* html_close_tag )
| ( wdg_open_tag (element | TEXT_NODE | EXT_REF)* wdg_close_tag )
| ( html_empty_tag | wdg_empty_tag ) )
;
html_empty_tag
: TAG_START_OPEN (ATTRIBUTE EQUAL VAL)* TAG_EMPTY_CLOSE
;
html_open_tag
: ( tag=TAG_START_OPEN (ATTRIBUTE EQUAL VAL)* TAG_CLOSE )
{$element::currentTag = $tag.text.substring(1);}
;
html_close_tag
: tag=TAG_END_OPEN TAG_CLOSE
{
if (!$element::currentTag.equals($tag.text.substring(2)))
notifyErrorListeners("HTML tag mismatch '" + $element::currentTag + "' - '" + $tag.text.substring(2) + "'");
}
;
wdg_empty_tag
: WDG_START_OPEN EQUAL PHP_REF ( COMMA (wdg_prop | wdg_event) )* WDG_EMPTY_CLOSE
;
wdg_open_tag
: tag=WDG_START_OPEN EQUAL PHP_REF ( COMMA (wdg_prop | wdg_event) )* WDG_CLOSE
{$element::currentTag = $tag.text.substring(1);}
;
wdg_close_tag
: tag=WDG_END_OPEN WDG_CLOSE
{
if (!$element::currentTag.equals($tag.text.substring(2)))
notifyErrorListeners("Widget alias mismatch '" + $element::currentTag + "' - '" + $tag.text + "'");
}
;
wdg_prop
: PROPERTY (EQUAL (ARRAY | VALUE | PHP_REF | UTC_DATE | REGEX | CSS_CLASS))?
;
wdg_event
: EVENT_HANDLER EQUAL PHP_REF
;
【问题讨论】:
标签: java netbeans syntax-highlighting antlr4 lexer