DotParser.scala:

package scaladot;
 
import scala.util.parsing.combinator._
import scala.util.parsing.combinator.syntactical._
import scala.util.parsing.combinator.lexical._
 
/**
 * A parser for the GraphViz "dot" language. This code is in the public domain.
 
 *  @author Ross Judson
 */
class DotParser extends StdTokenParsers with ImplicitConversions {
  // Fill in abstract defs
  type Tokens = DotLexer
  val lexical = new Tokens
 
  // Configure lexical parsing
  lexical.reserved ++= List("strict", "graph", "digraph", "node", "edge", "subgraph")
  lexical.delimiters ++= List("{", "}", "[", "]", ":", "=", ";", ",", "->", "--","\"")
 
  import lexical._
  
  /** We want to map an Option of None to the empty string identifier,
  and Some(s) to s. 
  */
  implicit def emptyIdentifier(n: Option[String]) = n match {
    case Some(id) => id
    case _ => ""
  }
 
  /** It seems that when we need to get the implicit versions of sequences
  of results out into a list of those results, the automatic conversion
  can be performed by this implicit function. Note that this function 
  uses existential typing; we don't care about the type of the second part
  of the ~'s type, so we ignore it. */
  implicit def convertList[B](lst: List[~[B,_]]) = lst.map(_._1)
  
  //graph  	:  	[ strict ] (graph | digraph) [ ID ] '{' stmt_list '}'
  lazy val dot = opt("strict") ~ ("graph" ^^ false | "digraph" ^^ true) ~ opt(ID) ~ "{" ~ stmt_list ~ "}" ^^
    { case str ~ typ ~ id ~ statements => Graph(str, typ, id, statements:_*) }
    
  //stmt_list  	:  	[ stmt [ ';' ] [ stmt_list ] ]
  lazy val stmt_list = rep(stmt ~ opt(";")) 
  
  // stmt  	:  	node_stmt
  //	| 	edge_stmt
  //	| 	attr_stmt
  //	| 	ID '=' ID
  //	| 	subgraph
  lazy val stmt: Parser[Statement] = subgraph | attr_set | edge_stmt | attr_stmt | node_stmt
  lazy val attr_set = ID ~ "=" ~ a_value ^^ 
    { case left ~ Pair(q,v) => Attr(left, Some(v), q) }
  
  //attr_stmt          :          (graph | node | edge) attr_list
  lazy val  attr_stmt = attr_list_type ~ attr_list ^^
    { case at ~ al => AttrList(at, al:_*) }
  
  lazy val attr_list_type = 
    "graph" ^^ "graph" |
    "node" ^^ "node" |
    "edge" ^^ "edge"
    
  //attr_list  	:  	'[' [ a_list ] ']' [ attr_list ]
  lazy val  attr_list = (("[" ~ a_list ~ "]")*) ^^ 
    { case lists => lists.flatMap(l => l) }
  
  //a_list  	:  	ID [ '=' ID ] [ ',' ] [ a_list ]
  lazy val  a_list = a_part * ","
  lazy val  a_part = log( 
    (ID ~ opt("=" ~ a_value) ^^ { 
      case n ~ Some((q,v)) => Attr(n,Some(v),q)
      case n ~ None => Attr(n, None, false)
      } ))("a_part")
  
  lazy val a_value = 
    accept("string", { case StringLit(v) => (true,v)}) |
    (ID ^^ { case v => (false,v) })
    
  lazy val a_string = log(accept("string", { case StringLit(v) => v }))("a_string")      
  
  //edge_stmt  	:  	(node_id | subgraph) edgeRHS [ attr_list ]
  lazy val  edge_stmt = (node_id | subgraph) ~ "->" ~ rep1sep(node_id | subgraph, "->") ~ attr_list ^^
    { case head ~ rest ~ attrs => Edge("?", attrs, (head :: rest):_*) }
  
  //node_stmt  	:  	node_id [ attr_list ]
  lazy val  node_stmt = node_id ~ attr_list ^^ 
    { case Node(n,p) ~ a => Node(n,p,a:_*) } 
  
  //node_id  	:  	ID [ port ]
  lazy val  node_id = ID ~ opt(port) ^^
    { case n ~ p => Node(n, p) }
   
  //port  	:  	':' ID [ ':' compass_pt ]
  //	| 	':' compass_pt
  lazy val  port = ":" ~ 
    ((ID ~ opt(":" ~ ID)) ^^ flatten2(Port) | 
     ID ^^ { Port (_, None) } ) 
  
  //subgraph  	:  	[ subgraph [ ID ] ] '{' stmt_list '}'
  lazy val  subgraph = "subgraph" ~ opt(ID) ~ "{" ~ stmt_list ~ "}" ^^ 
    { case n ~ s => Subgraph(n, s:_*) }
    
  //compass_pt  	:  	(n | ne | e | se | s | sw | w | nw)
  lazy val compass_pt = 
    "n" ^^ "n"   | 
    "ne" ^^ "ne" | 
    "e" ^^ "e"   | 
    "se" ^^ "se" | 
    "s" ^^ "s"   | 
    "sw" ^^ "sw" | 
    "w" ^^ "w"   | 
    "nw" ^^ "nw"
  
  lazy val ID = IDs | IDi
  lazy val IDs = accept("string", { case StringLit(n) => n })
  lazy val IDi = accept("identifier", { case Identifier(n) => n})
  
}

DotLexer.scala:

package scaladot;
 
import scala.util.parsing.combinator._
import scala.util.parsing.combinator.syntactical._
import scala.util.parsing.combinator.lexical._
import scala.util.parsing.input.CharArrayReader.EofCh
 
class DotLexer extends StdLexical with ImplicitConversions {
 
    override def token: Parser[Token] =
      ( string ^^ StringLit
      | number ~ letter ^^ { case n ~ l => ErrorToken("Invalid number format : " + n + l) }
      | '-' ~ whitespace ~ number ~ letter ^^ { case ws ~ num ~ l => ErrorToken("Invalid number format : -" + num + l) }
      | '-' ~ whitespace ~ number ^^ { case ws ~ num => NumericLit("-" + num) }
      | number ^^ NumericLit
      | EofCh ^^ EOF
      | delim
      | '\"' ~ failure("Unterminated string")
      | id ^^ checkKeyword
      | failure("Illegal character")
      )
 
    // def idcont = letter | digit | underscore
    def id = rep(letter | digit | elem("underscore", _=='_')) ^^ { _ mkString "" } 
      
    // def underscore: Parser[String] = elem('_')
      
    def checkKeyword(strRep: String) = {
      if (reserved contains strRep) Keyword(strRep) else Identifier(strRep)
    }
 
    /** A string is a collection of zero or more Unicode characters, wrapped in
     *  double quotes, using backslash escapes (cf. http://www.json.org/).
     */
    def string = '\"' ~ rep(charSeq | chrExcept('\"', '\n', EofCh)) ~ '\"' ^^ { _ mkString "" }
 
    override def whitespace = rep(whitespaceChar)
 
    def number = intPart ~ opt(fracPart) ~ opt(expPart) ^^ { case i ~ f ~ e =>
      i + optString(".", f) + optString("", e)
    }
    def intPart = zero | intList
    def intList = nonzero ~ rep(digit) ^^ {case x ~ y => (x :: y) mkString ""}
    def fracPart = '.' ~ rep(digit) ^^ { _ mkString "" }
    def expPart = exponent ~ opt(sign) ~ rep1(digit) ^^ { case e ~ s ~ d =>
      e + optString("", s) + d.mkString("")
    }
 
    private def optString[A](pre: String, a: Option[A]) = a match {
      case Some(x) => pre + x.toString
      case None => ""
    }
 
    def zero: Parser[String] = '0' ^^ "0"
    def nonzero = elem("nonzero digit", d => d.isDigit && d != '0')
    def exponent = elem("exponent character", d => d == 'e' || d == 'E')
    def sign = elem("sign character", d => d == '-' || d == '+')
 
    def charSeq: Parser[String] =
      ('\\' ~ '\"' ^^ "\""
      |'\\' ~ '\\' ^^ "\\"
      |'\\' ~ '/'  ^^ "/"
      |'\\' ~ 'b'  ^^ "\b"
      |'\\' ~ 'f'  ^^ "\f"
      |'\\' ~ 'n'  ^^ "\n"
      |'\\' ~ 'r'  ^^ "\r"
      |'\\' ~ 't'  ^^ "\t"
      |'\\' ~ 'u' ~ unicodeBlock)
 
    val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray
    def hexDigit = elem("hex digit", hexDigits.contains(_))
 
    private def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ {
      case a ~ b ~ c ~ d =>
        new String(io.UTF8Codec.encode(Integer.parseInt(List(a, b, c, d) mkString "", 16)))
    }
 
    //private def lift[T](f: String => T)(xs: List[Any]): T = f(xs mkString "")
  }
 

And finally, an example of usage, together with the AST for DOT:

package scaladot;
 
object Dot extends DotParser {
 
  def parse(input: String) =
    phrase(dot)(new lexical.Scanner(input)) match {
      case Success(result, _) => println("Success!"); Some(result)
      case n @ _ => println(n); None
    }  
  
  
  def main(args: Array[String]) {
    val x = parse("""
      digraph acm {
        hello -> world;
        test:up:n -> world;
        style = filled;
        
        subgraph cluster {
          node [style=filled,color=white];
          toast -> bingo;
          zot -> bingo;
          zot -> test;
          style=filled;
          color=lightgrey;
          label = "Below";
        }        
 
    }
    
    """)
 
    println(x)
      
  }
}
 
abstract class DotComponent {
  override def toString = {
    val b = new StringBuilder
    buildString(0, b)
    b.toString()
  }
  
  private def indent(level: Int, b: StringBuilder) {
    for (i <- 0 to level) b append ' ' 
  }
  
  def buildString(implicit level: Int, b: StringBuilder) {
    
    def between(sep: String, things: Seq[DotComponent])(implicit lev: Int) {
      var first = true
      for (t <- things) {
        if (first) first = false else b append sep
        t.buildString(lev, b)
      }
    }
    
    def betweenList(before: String, sep: String, after: String, things: Seq[DotComponent])(implicit lev: Int) {
      if (!things.isEmpty) {
        b append before
        between(sep, things)(lev)
        b append after
      }
    }
 
    this match {
    case Port(id, compass) =>
      b append id
      if (compass != None) b append ':' append compass.get
    case Graph(strict, digraph, id, statements @ _*) =>
      indent(level,b)
      if (strict) b append "strict "
      b append (if (digraph) "digraph " else "graph ")
      betweenList(id + " {\n", "\n", "}\n", statements)(level+1)
    case AttrList(kind, attrs @ _*) =>
      indent(level,b)
      b append kind
      betweenList(" [", ",", "]", attrs)(0)
    case Attr(n, Some(v), q) => 
      indent(level,b)
      b append n append '='
      if (q) b append '"'
      b append v
      if (q) b append '"'
    case Attr(n,_,_) => b append n
    case Edge(_, attrs, nodes @ _*) =>
      indent(level,b)
      between(" -> ", nodes)
      betweenList(" [", ",", "]", attrs)(0)
    case Subgraph(id, statements @ _*) =>
      indent(level,b)
      b append "subgraph " append id 
      betweenList(" {\n", "\n", "\n", statements)(level+1)
      indent(level,b)
      b append "}\n"
    case Node(id, port, attrs @ _*) =>
      b append id
      if (port != None) { b append ':'; port.get.buildString(level, b) }
      betweenList(" [", ", ", " ]\n", attrs)
  } }
  
}
 
/** Implemented by DOT components that are allowed to have an identity
*/
trait Identified extends DotComponent {
  val id: String
}
 
/** Implemented by DOT components that are statements, for use within
graphs and subgraphs. */
trait Statement extends DotComponent 
 
/** Implemented by DOT components that have a list of attributes associated
with them. */
trait Attributed extends DotComponent {
  val attrs: Seq[Attr]
}
 
/** Implemented by DOT components that can participate in an edge; currently
Node and Subgraph. */
trait EdgeComponent extends Identified
 
/** The abstract base for the two graph components of DOT -- graph (digraph)
and subgraph. */
abstract class AbstractGraph extends DotComponent with Identified {
  val statements: Seq[Statement]
}
/** Nodes can have an optional port identifier. The port identifier can have
an optional compass direction. */
case class Port(id: String, compass: Option[String]) extends DotComponent 
 
case class Graph(strict: Boolean, digraph: Boolean, id: String, statements: Statement*) extends AbstractGraph
 
//
// Statements
//
case class AttrList(kind: String, attrs: Attr*) extends Statement with Attributed
case class Attr(name: String, value: Option[String], quoted: Boolean) extends Statement
case class Edge(id: String, attrs: Seq[Attr], nodes: EdgeComponent*) extends Identified with Statement with Attributed
case class Subgraph(id: String, statements: Statement*) extends AbstractGraph with EdgeComponent with Statement
case class Node(id: String, port: Option[Port], attrs: Attr*) extends EdgeComponent with Statement with Attributed
 


 


相关文章: