flowchart.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. // Package parser provides syntax analysis for Mermaid diagrams.
  2. // Based on the grammar rules from flow.jison in mermaid.js
  3. package parser
  4. import (
  5. "fmt"
  6. _ "strconv"
  7. _ "strings"
  8. "mermaid-go/pkg/ast"
  9. "mermaid-go/pkg/lexer"
  10. )
  11. // Parser implements recursive descent parsing for Mermaid flowcharts
  12. // Following the grammar structure from flow.jison
  13. type Parser struct {
  14. tokens []lexer.Token
  15. current int
  16. flowDB *FlowDB
  17. }
  18. // FlowDB manages the state during parsing, mirroring mermaid.js FlowDB
  19. type FlowDB struct {
  20. vertexCounter int
  21. vertices map[string]*ast.FlowVertex
  22. edges []*ast.FlowEdge
  23. classes map[string]*ast.FlowClass
  24. subGraphs []*ast.FlowSubGraph
  25. subGraphLookup map[string]*ast.FlowSubGraph
  26. tooltips map[string]string
  27. direction string
  28. version string
  29. defaultStyle []string
  30. defaultInterpolate string
  31. }
  32. // NewFlowDB creates a new flow database
  33. func NewFlowDB() *FlowDB {
  34. return &FlowDB{
  35. vertices: make(map[string]*ast.FlowVertex),
  36. edges: make([]*ast.FlowEdge, 0),
  37. classes: make(map[string]*ast.FlowClass),
  38. subGraphs: make([]*ast.FlowSubGraph, 0),
  39. subGraphLookup: make(map[string]*ast.FlowSubGraph),
  40. tooltips: make(map[string]string),
  41. version: "gen-2",
  42. }
  43. }
  44. // NewParser creates a new parser
  45. func NewParser() *Parser {
  46. return &Parser{
  47. flowDB: NewFlowDB(),
  48. }
  49. }
  50. // NewFlowchartParser creates a new flowchart parser (alias for NewParser)
  51. func NewFlowchartParser() *Parser {
  52. return NewParser()
  53. }
  54. // Parse parses the input string and returns a flowchart diagram
  55. func (p *Parser) Parse(input string) (ast.Diagram, error) {
  56. // Tokenize
  57. l := lexer.NewLexer(input)
  58. tokens, err := l.Tokenize()
  59. if err != nil {
  60. return nil, fmt.Errorf("lexical analysis failed: %w", err)
  61. }
  62. // Filter out whitespace and comments
  63. p.tokens = lexer.FilterTokens(tokens)
  64. p.current = 0
  65. // Reset parser state
  66. p.flowDB = NewFlowDB()
  67. // Parse according to grammar
  68. err = p.parseDocument()
  69. if err != nil {
  70. return nil, fmt.Errorf("syntax analysis failed: %w", err)
  71. }
  72. // Build final flowchart
  73. return p.buildFlowchart(), nil
  74. }
  75. // parseDocument implements the top-level grammar rule
  76. // document: graphStatement | document graphStatement
  77. func (p *Parser) parseDocument() error {
  78. for !p.isAtEnd() {
  79. if err := p.parseStatement(); err != nil {
  80. return err
  81. }
  82. }
  83. return nil
  84. }
  85. // parseStatement parses individual statements
  86. func (p *Parser) parseStatement() error {
  87. if p.isAtEnd() {
  88. return nil
  89. }
  90. token := p.peek()
  91. switch token.Type {
  92. case lexer.TokenGraph:
  93. return p.parseGraphStatement()
  94. case lexer.TokenSubgraph:
  95. return p.parseSubgraphStatement()
  96. case lexer.TokenClass:
  97. return p.parseClassStatement()
  98. case lexer.TokenClassDef:
  99. return p.parseClassDefStatement()
  100. case lexer.TokenStyle:
  101. return p.parseStyleStatement()
  102. case lexer.TokenLinkStyle:
  103. return p.parseLinkStyleStatement()
  104. case lexer.TokenClick:
  105. return p.parseClickStatement()
  106. case lexer.TokenNewline:
  107. p.advance() // Skip newlines
  108. return nil
  109. case lexer.TokenEOF:
  110. return nil
  111. default:
  112. // Try to parse as edge statement
  113. return p.parseEdgeStatement()
  114. }
  115. }
  116. // parseGraphStatement: GRAPH dir? (NL graphStatementList)?
  117. func (p *Parser) parseGraphStatement() error {
  118. if !p.check(lexer.TokenGraph) {
  119. return p.error("expected 'graph'")
  120. }
  121. p.advance()
  122. // Optional direction
  123. if p.checkDirection() {
  124. dir := p.advance()
  125. p.flowDB.direction = dir.Value
  126. }
  127. // Optional newline
  128. if p.check(lexer.TokenNewline) {
  129. p.advance()
  130. }
  131. return nil
  132. }
  133. // parseSubgraphStatement handles subgraph definitions
  134. func (p *Parser) parseSubgraphStatement() error {
  135. if !p.check(lexer.TokenSubgraph) {
  136. return p.error("expected 'subgraph'")
  137. }
  138. p.advance()
  139. // TODO: Implement subgraph parsing based on flow.jison
  140. // For now, skip to end
  141. for !p.check(lexer.TokenEnd) && !p.isAtEnd() {
  142. p.advance()
  143. }
  144. if p.check(lexer.TokenEnd) {
  145. p.advance()
  146. }
  147. return nil
  148. }
  149. // parseClassStatement handles class assignments
  150. func (p *Parser) parseClassStatement() error {
  151. if !p.check(lexer.TokenClass) {
  152. return p.error("expected 'class'")
  153. }
  154. p.advance()
  155. // Skip implementation for now
  156. return p.skipToNextStatement()
  157. }
  158. // parseClassDefStatement handles class definitions
  159. func (p *Parser) parseClassDefStatement() error {
  160. if !p.check(lexer.TokenClassDef) {
  161. return p.error("expected 'classDef'")
  162. }
  163. p.advance()
  164. // Skip implementation for now
  165. return p.skipToNextStatement()
  166. }
  167. // parseStyleStatement handles style definitions
  168. func (p *Parser) parseStyleStatement() error {
  169. if !p.check(lexer.TokenStyle) {
  170. return p.error("expected 'style'")
  171. }
  172. p.advance()
  173. // Skip implementation for now
  174. return p.skipToNextStatement()
  175. }
  176. // parseLinkStyleStatement handles link style definitions
  177. func (p *Parser) parseLinkStyleStatement() error {
  178. if !p.check(lexer.TokenLinkStyle) {
  179. return p.error("expected 'linkStyle'")
  180. }
  181. p.advance()
  182. // Skip implementation for now
  183. return p.skipToNextStatement()
  184. }
  185. // parseClickStatement handles click event definitions
  186. func (p *Parser) parseClickStatement() error {
  187. if !p.check(lexer.TokenClick) {
  188. return p.error("expected 'click'")
  189. }
  190. p.advance()
  191. // Skip implementation for now
  192. return p.skipToNextStatement()
  193. }
  194. // parseEdgeStatement parses edge definitions
  195. // This is the core parsing logic for flowchart connections
  196. func (p *Parser) parseEdgeStatement() error {
  197. // Parse start vertex
  198. startVertex, err := p.parseVertex()
  199. if err != nil {
  200. return err
  201. }
  202. // Parse edge
  203. edge, err := p.parseEdge()
  204. if err != nil {
  205. return err
  206. }
  207. // Parse end vertex
  208. endVertex, err := p.parseVertex()
  209. if err != nil {
  210. return err
  211. }
  212. // Create edge in flowDB
  213. return p.addEdge(startVertex, endVertex, edge)
  214. }
  215. // parseVertex parses vertex definitions with shapes
  216. // Examples: A[Text], B(Text), C{Text}, etc.
  217. func (p *Parser) parseVertex() (*VertexInfo, error) {
  218. if !p.check(lexer.TokenID) {
  219. return nil, p.error("expected vertex identifier")
  220. }
  221. id := p.advance().Value
  222. vertex := &VertexInfo{ID: id}
  223. // Check for shape definition
  224. if p.checkShapeStart() {
  225. shape, text, err := p.parseShape()
  226. if err != nil {
  227. return nil, err
  228. }
  229. vertex.Shape = shape
  230. vertex.Text = text
  231. // Add vertex to flowDB
  232. p.addVertex(id, text, shape)
  233. }
  234. return vertex, nil
  235. }
  236. // VertexInfo holds parsed vertex information
  237. type VertexInfo struct {
  238. ID string
  239. Text string
  240. Shape ast.FlowVertexTypeParam
  241. }
  242. // EdgeInfo holds parsed edge information
  243. type EdgeInfo struct {
  244. Type string
  245. Text string
  246. Length int
  247. Stroke ast.FlowEdgeStroke
  248. }
  249. // parseShape parses shape definitions [text], (text), {text}, etc.
  250. func (p *Parser) parseShape() (ast.FlowVertexTypeParam, string, error) {
  251. startToken := p.peek()
  252. var shape ast.FlowVertexTypeParam
  253. var endToken lexer.TokenType
  254. switch startToken.Type {
  255. case lexer.TokenOpenBracket:
  256. shape = ast.VertexTypeRect
  257. endToken = lexer.TokenCloseBracket
  258. case lexer.TokenOpenParen:
  259. if p.checkNext(lexer.TokenOpenParen) { // ((text))
  260. shape = ast.VertexTypeCircle
  261. p.advance() // skip first (
  262. p.advance() // skip second (
  263. endToken = lexer.TokenCloseParen
  264. } else { // (text)
  265. shape = ast.VertexTypeRound
  266. endToken = lexer.TokenCloseParen
  267. }
  268. case lexer.TokenOpenBrace:
  269. shape = ast.VertexTypeDiamond
  270. endToken = lexer.TokenCloseBrace
  271. case lexer.TokenOpenDoubleParen:
  272. shape = ast.VertexTypeCircle
  273. endToken = lexer.TokenCloseDoubleParen
  274. default:
  275. return "", "", p.error("expected shape delimiter")
  276. }
  277. if shape != ast.VertexTypeCircle || startToken.Type != lexer.TokenOpenDoubleParen {
  278. p.advance() // consume opening delimiter
  279. }
  280. // Parse text content
  281. text := ""
  282. for !p.check(endToken) && !p.isAtEnd() {
  283. if p.check(lexer.TokenString) {
  284. // Remove quotes from string
  285. val := p.advance().Value
  286. text = val[1 : len(val)-1] // Remove surrounding quotes
  287. } else {
  288. text += p.advance().Value
  289. }
  290. }
  291. if !p.check(endToken) {
  292. return "", "", p.error(fmt.Sprintf("expected closing delimiter"))
  293. }
  294. p.advance() // consume closing delimiter
  295. // Handle double paren closing
  296. if shape == ast.VertexTypeCircle && endToken == lexer.TokenCloseParen {
  297. if !p.check(lexer.TokenCloseParen) {
  298. return "", "", p.error("expected second closing parenthesis")
  299. }
  300. p.advance()
  301. }
  302. return shape, text, nil
  303. }
  304. // parseEdge parses edge definitions with arrows and labels
  305. func (p *Parser) parseEdge() (*EdgeInfo, error) {
  306. edge := &EdgeInfo{
  307. Stroke: ast.StrokeNormal,
  308. Length: 1,
  309. }
  310. // Parse edge label if present (|text|)
  311. if p.check(lexer.TokenPipe) {
  312. p.advance() // consume |
  313. // Collect text until next |
  314. text := ""
  315. for !p.check(lexer.TokenPipe) && !p.isAtEnd() {
  316. text += p.advance().Value
  317. }
  318. if !p.check(lexer.TokenPipe) {
  319. return nil, p.error("expected closing pipe for edge label")
  320. }
  321. p.advance() // consume closing |
  322. edge.Text = text
  323. }
  324. // Parse arrow type
  325. if !p.checkArrow() {
  326. return nil, p.error("expected arrow")
  327. }
  328. arrow := p.advance()
  329. edge.Type, edge.Stroke = p.parseArrowType(arrow.Value)
  330. return edge, nil
  331. }
  332. // parseArrowType extracts type and stroke from arrow token
  333. func (p *Parser) parseArrowType(arrow string) (string, ast.FlowEdgeStroke) {
  334. switch arrow {
  335. case "-->":
  336. return "arrow_point", ast.StrokeNormal
  337. case "-.->":
  338. return "arrow_point", ast.StrokeDotted
  339. case "==>":
  340. return "arrow_point", ast.StrokeThick
  341. case "--x":
  342. return "arrow_cross", ast.StrokeNormal
  343. case "--o":
  344. return "arrow_circle", ast.StrokeNormal
  345. case "---":
  346. return "arrow_open", ast.StrokeNormal
  347. default:
  348. return "arrow_point", ast.StrokeNormal
  349. }
  350. }
  351. // FlowDB manipulation methods (mirroring mermaid.js FlowDB)
  352. // addVertex adds a vertex to the flow database
  353. func (p *Parser) addVertex(id, text string, vertexType ast.FlowVertexTypeParam) {
  354. vertex := p.flowDB.vertices[id]
  355. if vertex == nil {
  356. vertex = &ast.FlowVertex{
  357. ID: id,
  358. LabelType: "text",
  359. DomID: fmt.Sprintf("flowchart-%s-%d", id, p.flowDB.vertexCounter),
  360. Styles: make([]string, 0),
  361. Classes: make([]string, 0),
  362. }
  363. p.flowDB.vertices[id] = vertex
  364. p.flowDB.vertexCounter++
  365. }
  366. if text != "" {
  367. vertex.Text = &text
  368. }
  369. if vertexType != "" {
  370. vertex.Type = &vertexType
  371. }
  372. }
  373. // addEdge adds an edge to the flow database
  374. func (p *Parser) addEdge(start, end *VertexInfo, edge *EdgeInfo) error {
  375. // Ensure vertices exist
  376. p.addVertex(start.ID, start.Text, start.Shape)
  377. p.addVertex(end.ID, end.Text, end.Shape)
  378. // Create edge
  379. flowEdge := &ast.FlowEdge{
  380. Start: start.ID,
  381. End: end.ID,
  382. Text: edge.Text,
  383. LabelType: "text",
  384. Classes: make([]string, 0),
  385. IsUserDefinedID: false,
  386. }
  387. if edge.Type != "" {
  388. flowEdge.Type = &edge.Type
  389. }
  390. if edge.Stroke != "" {
  391. flowEdge.Stroke = &edge.Stroke
  392. }
  393. // Generate edge ID
  394. edgeID := fmt.Sprintf("L-%s-%s-%d", start.ID, end.ID, len(p.flowDB.edges))
  395. flowEdge.ID = edgeID
  396. p.flowDB.edges = append(p.flowDB.edges, flowEdge)
  397. return nil
  398. }
  399. // buildFlowchart creates the final flowchart from flowDB state
  400. func (p *Parser) buildFlowchart() *ast.Flowchart {
  401. flowchart := ast.NewFlowchart()
  402. flowchart.Direction = p.flowDB.direction
  403. flowchart.Vertices = p.flowDB.vertices
  404. flowchart.Edges = p.flowDB.edges
  405. flowchart.Classes = p.flowDB.classes
  406. flowchart.SubGraphs = p.flowDB.subGraphs
  407. flowchart.SubGraphLookup = p.flowDB.subGraphLookup
  408. flowchart.Tooltips = p.flowDB.tooltips
  409. flowchart.Version = p.flowDB.version
  410. return flowchart
  411. }
  412. // Helper methods
  413. // check returns true if current token matches the given type
  414. func (p *Parser) check(tokenType lexer.TokenType) bool {
  415. if p.isAtEnd() {
  416. return false
  417. }
  418. return p.peek().Type == tokenType
  419. }
  420. // checkNext returns true if next token matches the given type
  421. func (p *Parser) checkNext(tokenType lexer.TokenType) bool {
  422. if p.current+1 >= len(p.tokens) {
  423. return false
  424. }
  425. return p.tokens[p.current+1].Type == tokenType
  426. }
  427. // checkDirection returns true if current token is a direction
  428. func (p *Parser) checkDirection() bool {
  429. if p.isAtEnd() {
  430. return false
  431. }
  432. tokenType := p.peek().Type
  433. return tokenType == lexer.TokenTD || tokenType == lexer.TokenTB ||
  434. tokenType == lexer.TokenBT || tokenType == lexer.TokenRL ||
  435. tokenType == lexer.TokenLR
  436. }
  437. // checkShapeStart returns true if current token starts a shape
  438. func (p *Parser) checkShapeStart() bool {
  439. if p.isAtEnd() {
  440. return false
  441. }
  442. tokenType := p.peek().Type
  443. return tokenType == lexer.TokenOpenBracket || tokenType == lexer.TokenOpenParen ||
  444. tokenType == lexer.TokenOpenBrace || tokenType == lexer.TokenOpenDoubleParen
  445. }
  446. // checkArrow returns true if current token is an arrow
  447. func (p *Parser) checkArrow() bool {
  448. if p.isAtEnd() {
  449. return false
  450. }
  451. tokenType := p.peek().Type
  452. return tokenType == lexer.TokenArrowSolid || tokenType == lexer.TokenArrowDotted ||
  453. tokenType == lexer.TokenArrowThick || tokenType == lexer.TokenArrowOpen ||
  454. tokenType == lexer.TokenArrowPoint || tokenType == lexer.TokenArrowCross ||
  455. tokenType == lexer.TokenArrowCircle
  456. }
  457. // advance consumes and returns the current token
  458. func (p *Parser) advance() lexer.Token {
  459. if !p.isAtEnd() {
  460. p.current++
  461. }
  462. return p.previous()
  463. }
  464. // isAtEnd returns true if we've reached the end of tokens
  465. func (p *Parser) isAtEnd() bool {
  466. return p.current >= len(p.tokens) || p.peek().Type == lexer.TokenEOF
  467. }
  468. // peek returns the current token without advancing
  469. func (p *Parser) peek() lexer.Token {
  470. if p.current >= len(p.tokens) {
  471. return lexer.Token{Type: lexer.TokenEOF}
  472. }
  473. return p.tokens[p.current]
  474. }
  475. // previous returns the previous token
  476. func (p *Parser) previous() lexer.Token {
  477. if p.current <= 0 {
  478. return lexer.Token{Type: lexer.TokenEOF}
  479. }
  480. return p.tokens[p.current-1]
  481. }
  482. // error creates a parsing error
  483. func (p *Parser) error(message string) error {
  484. token := p.peek()
  485. return fmt.Errorf("parse error at line %d, column %d: %s (got %s)",
  486. token.Line, token.Column, message, token.Type)
  487. }
  488. // skipToNextStatement skips tokens until next statement
  489. func (p *Parser) skipToNextStatement() error {
  490. for !p.isAtEnd() && !p.check(lexer.TokenNewline) {
  491. p.advance()
  492. }
  493. if p.check(lexer.TokenNewline) {
  494. p.advance()
  495. }
  496. return nil
  497. }