【问题标题】:How do you parse multi-level "nodes" in text?你如何解析文本中的多级“节点”?
【发布时间】:2009-07-25 01:03:43
【问题描述】:

我有一个类似*.sln格式的配置格式,以如下为例:

DCOM Productions Configuration File, Format Version 1.0

BeginSection:Global
    GlobalKeyA = AnswerOne

    .: Stores the global configuration key
    :: for the application. This key is used
    :: to save the current state of the app.
    :: as well as prevent lockups
    GlobalKey3 = AnswerTwo

    .: Secondary Key. See above setting
    GlobalKeyC = AnswerThree

    BeginSection: UpdateSystem
        NestedKeyA = One
        NestedKeyB = Two
        NestedKeyC = { A set of multiline data
                      where we will show how
                      to write a multiline
                      paragraph }
        NestedKeyD = System.Int32, 100
    EndSection
EndSection

BeginSection:Application
    InstallPath = C:\Program Files\DCOM Productions\BitFlex
EndSection

我知道我需要一个递归函数,它可能将一段文本作为参数,例如,将整个部分传递给它,然后以这种方式递归解析它。

我似乎无法理解如何做到这一点。每个部分都可能有更多的子部分。它就像一个 Xml 文档。我在这里并不是真的要代码,只是关于如何解析这样的文档的方法。

我正在考虑使用选项卡(指定索引)来确定我正在使用哪个部分,但如果文档没有正确地选项卡(格式),这将失败。有更好的想法吗?

【问题讨论】:

    标签: c# parsing text


    【解决方案1】:

    也许您可以在这种格式和 XML 之间进行比较。 IE。 BeginSection "" EndSection ""

    将其视为具有许多根元素的 XML 文件。 BeginSection 和 EndSection 里面的内容将是您的内部 xml 节点,例如 NestedKeyA = 作为节点名称,“One”作为值。

    .: 好像是一个评论,所以你可以跳过它。 System.Int32, 100 - 可以是节点的属性和值

    { 一组多行数据 我们将展示如何 写多行 段落 } - 你也可以用算法来解析它。

    【讨论】:

    • 是的,Begin 和 EndSection 基本上是开始结束节点,但是我如何区分哪个 EndSection 属于哪个 BeginSection?我不能只抓住第一个,因为它可能是嵌套节点的 EndSection 而不是正在解析的第一个。
    • 编写一个解析器,解析一个BeginSection,如果在BeginSection中遇到一个BeginSection,则在新的子节开始时调用自身。将结果作为哈希 ref 传递出去,可以在调用函数中添加到哈希中
    • 好的,感谢您的洞察力。我想我现在知道该怎么做,如果我有任何其他问题弹出,我想我会回复。谢谢!
    【解决方案2】:

    好的,我做到了。 * *

    /// <summary>
    /// Reads and parses xdf strings
    /// </summary>
    public sealed class XdfReader {
        /// <summary>
        /// Instantiates a new instance of the DCOMProductions.BitFlex.IO.XdfReader class.
        /// </summary>
        public XdfReader() {
            //
            // TODO: Any constructor code here
            //
        }
    
        #region Constants
    
        /// <devdoc>
        /// This regular expression matches against a section beginning. A section may look like the following:
        /// 
        ///     SectionName:Begin
        ///     
        /// Where 'SectionName' is the name of the section, and ':Begin' represents that this is the
        /// opening tag for the section. This allows the parser to differentiate between open and
        /// close tags.
        /// </devdoc>
        private const String SectionBeginRegularExpression = @"[0-9a-zA-Z]*:Begin";
    
        /// <devdoc>
        /// This regular expression matches against a section ending. A section may look like the following:
        /// 
        ///     SectionName:End
        ///     
        /// Where 'SectionName' is the name of the section, and ':End' represents that this is the
        /// closing tag for the section. This allows the parser to differentiate between open and
        /// close tags.
        /// </devdoc>
        private const String SectionEndRegularExpression = @"[0-9a-zA-Z]*:End";
    
        /// <devdoc>
        /// This regular expression matches against a key and it's value. A key may look like the following:
        /// 
        ///     KeyName=KeyValue
        ///     KeyName = KeyValue
        ///     KeyName =KeyValue
        ///     KeyName= KeyValue
        ///     KeyName    =       KeyValue
        ///                 
        /// And so on so forth. This regular expression matches against all of these, where the whitespace
        /// former and latter of the assignment operator are optional.
        /// </devdoc>
        private const String KeyRegularExpression = @"[0-9a-zA-Z]*\s*?=\s*?[^\r]*";
    
        #endregion
    
        #region Methods
    
        public void Flush() {
            throw new System.NotImplementedException();
        }
    
        private String GetSectionName(String xdf) {
            Match sectionMatch = Regex.Match(xdf, SectionBeginRegularExpression);
    
            if (sectionMatch.Success) {
                String retVal = sectionMatch.Value;
                retVal = retVal.Substring(0, retVal.IndexOf(':'));
                return retVal;
            }
            else {
                throw new BitFlex.IO.XdfException("The specified xdf did not contain a valid section.");
            }
        }
    
        public XdfFile ReadFile(String fileName) {
            throw new System.NotImplementedException();
        }
    
        public XdfKey ReadKey(String xdf) {
            Match keyMatch = Regex.Match(xdf, KeyRegularExpression);
    
            if (keyMatch.Success) {
                String name = keyMatch.Value.Substring(0, keyMatch.Value.IndexOf('='));
                name = name.TrimEnd(' ');
    
                XdfKey retVal = new XdfKey(name);
    
                String value = keyMatch.Value.Remove(0, keyMatch.Value.IndexOf('=') + 1);
                value = value.TrimStart(' ');
    
                retVal.Value = value;
                return retVal;
            }
            else {
                throw new BitFlex.IO.XdfException("The specified xdf did not contain a valid key.");
            }
        }
    
        public XdfSection ReadSection(String xdf) {
            if (ValidateSection(xdf)) {
                String[] rows = xdf.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
                XdfSection rootSection = new XdfSection(GetSectionName(rows[0])); System.Diagnostics.Debug.WriteLine(rootSection.Name);
    
                do {
                    Match beginMatch = Regex.Match(xdf, SectionBeginRegularExpression);
                    beginMatch = beginMatch.NextMatch();
    
                    if (beginMatch.Success) {
                        Match endMatch = Regex.Match(xdf, String.Format("{0}:End", GetSectionName(beginMatch.Value)));
    
                        if (endMatch.Success) {
                            String sectionXdf = xdf.Substring(beginMatch.Index, (endMatch.Index + endMatch.Length) - beginMatch.Index);
                            xdf = xdf.Remove(beginMatch.Index, (endMatch.Index + endMatch.Length) - beginMatch.Index);
    
                            XdfSection section = ReadSection(sectionXdf); System.Diagnostics.Debug.WriteLine(section.Name);
    
                            rootSection.Sections.Add(section);
                        }
                        else {
                            throw new BitFlex.IO.XdfException(String.Format("There is a missing section ending at index {0}.", endMatch.Index));
                        }
                    }
                    else {
                        break;
                    }
                } while (true);
    
                MatchCollection keyMatches = Regex.Matches(xdf, KeyRegularExpression);
    
                foreach (Match item in keyMatches) {
                    XdfKey key = ReadKey(item.Value);
                    rootSection.Keys.Add(key);
                }
    
                return rootSection;
            }
            else {
                throw new BitFlex.IO.XdfException("The specified xdf did not contain a valid section.");
            }
        }
    
        private Boolean ValidateSection(String xdf) {
            String[] rows = xdf.Split(new String[] { "\r\n" }, StringSplitOptions.None);
    
            if (Regex.Match(rows[0], SectionBeginRegularExpression).Success) {
                if (Regex.Match(rows[rows.Length - 1], SectionEndRegularExpression).Success) {
                    return true;
                }
                else {
                    return false;
                }
            }
            else {
                return false;
            }
        }
    
        #endregion
    }
    

    }

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2016-03-05
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多