【问题标题】:How to use Rust nom to write a parser for this kind of structure text?如何使用 Rust nom 为这种结构文本编写解析器?
【发布时间】:2021-08-15 16:58:45
【问题描述】:

我有以下数据

    let data = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;

基本上它代表三个条目:

struct Entry {
    title: String,
    body: String,
}

每个条目都有一个标题和一个正文。标题使用单行(不包括行尾),正文使用所有以下行,直到遇到分隔线(sep/)。我想要的结果是一个条目向量。如何使用 nom 来解析它?我对 nom 很陌生,我不能让这些部分一起工作并形成一个工作解析器。以下是我所拥有的:


use nom::IResult;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until, is_not, is_a};
use nom::error::ErrorKind::ParseTo;
use nom::sequence::{pair, tuple, delimited, terminated};
use nom::combinator::opt;
use nom::error::{Error, ErrorKind};
use nom::character::complete::line_ending;
use nom::regexp::str::{re_find, re_match, re_matches, re_capture};
use nom::multi::many0;

struct Entry {
    title: String,
    body: String,
}

fn get_entry_title(i: &str) -> IResult<&str, &str> {
    delimited(tag(""),
              take_until(alt((
                  tag("\r\n"),
                  tag("\n")
              ))),
              alt((
                  tag("\r\n"),
                  tag("\n")
              ))
    )(i)
}

fn get_entry_body(i: &str) -> IResult<&str, &str> {
    terminated(
        take_until( tag("sep/")),
        tag("sep/")
    )(i)
}

fn main() {
    let data = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;

    let result = get_entry_title(&data);
}

【问题讨论】:

    标签: rust nom


    【解决方案1】:

    这是一种仅标称方法(标称 6.1.2):

    use nom::IResult;
    use nom::branch::alt;
    use nom::bytes::complete::{tag, take_while};
    use nom::sequence::{terminated, delimited, pair};
    use nom::multi::{separated_list0, many1};
    
    #[derive(Debug)]
    struct Entry {
        title: String,
        body: String,
    }
    
    fn main() {
        let input = r#"title1
    title1 line1
    title1 line2
    sep/
    title2
    title2 line1
    title2 line2
    title2 line3
    sep/
    title3
    title3 line1
    sep/"#;
    
        let (_, entries) = parse(input).unwrap();
        println!("{:#?}", entries);
    }
    
    fn parse(input: &str) -> IResult<&str, Vec<Entry>> {
        separated_list0(
            separator,
            entry,
        )(input)
    }
    
    fn entry(input: &str) -> IResult<&str, Entry> {
        let (input, title) = title(input)?;
        let (input, body_lines) = many1(body_line(title))(input)?;
    
        let body = body_lines.join("");
        let entry = Entry {
            title: title.to_owned(),
            body,
        };
        
        //TODO: Does it have to end with a separator ? 
        // If it does, either use terminated() in combination with many(), or add
        // an additional check for separator here
        
    
        IResult::Ok((input, entry))
    }
    
    fn title(input: &str) -> IResult<&str, &str> {
        terminated(
            take_while(not_r_n),
            end_of_line,
        )(input)
    }
    
    pub fn body_line<'i>(title: &'i str) -> impl FnMut(&'i str) -> IResult<&'i str, &'i str, nom::error::Error<&'i str>>
    {
        move |input: &str| {
            delimited(
                pair(tag(title), tag(" ")),
                take_while(not_r_n),
                end_of_line,
            )(input)
        }
    }
    
    fn separator(input: &str) -> IResult<&str, &str> {
        terminated(
            tag("sep/"), // the separator is hardcoded, otherwise you have to do the same monstrosity as body_line() above
            end_of_line,
        )(input)
    }
    
    fn end_of_line(input: &str) -> IResult<&str, &str> {
        alt((
            tag("\n"),
            tag("\r\n")
        ))(input)
    }
    
    fn not_r_n(ch: char) -> bool {
        ch != '\r' && ch != '\n'
    }
    

    【讨论】:

      猜你喜欢
      • 2021-10-12
      • 2020-09-10
      • 1970-01-01
      • 2021-05-04
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多