// Package parser provides syntax analysis for Mermaid diagrams. // Based on the grammar rules from flow.jison in mermaid.js package parser import ( "fmt" "strings" "mermaid-go/pkg/ast" "mermaid-go/pkg/lexer" ) // Parser implements recursive descent parsing for Mermaid flowcharts // Following the grammar structure from flow.jison type Parser struct { tokens []lexer.Token current int flowDB *FlowDB } // FlowDB manages the state during parsing, mirroring mermaid.js FlowDB type FlowDB struct { vertexCounter int vertices map[string]*ast.FlowVertex edges []*ast.FlowEdge classes map[string]*ast.FlowClass subGraphs []*ast.FlowSubGraph subGraphLookup map[string]*ast.FlowSubGraph tooltips map[string]string direction string version string defaultStyle []string defaultInterpolate string } // NewFlowDB creates a new flow database func NewFlowDB() *FlowDB { return &FlowDB{ vertices: make(map[string]*ast.FlowVertex), edges: make([]*ast.FlowEdge, 0), classes: make(map[string]*ast.FlowClass), subGraphs: make([]*ast.FlowSubGraph, 0), subGraphLookup: make(map[string]*ast.FlowSubGraph), tooltips: make(map[string]string), version: "gen-2", } } // NewParser creates a new parser func NewParser() *Parser { return &Parser{ flowDB: NewFlowDB(), } } // NewFlowchartParser creates a new flowchart parser (alias for NewParser) func NewFlowchartParser() *Parser { return NewParser() } // Parse parses the input string and returns a flowchart diagram func (p *Parser) Parse(input string) (ast.Diagram, error) { // Tokenize l := lexer.NewLexer(input) tokens, err := l.Tokenize() if err != nil { return nil, fmt.Errorf("lexical analysis failed: %w", err) } // Filter out whitespace and comments p.tokens = lexer.FilterTokens(tokens) p.current = 0 // Reset parser state p.flowDB = NewFlowDB() // Parse according to grammar err = p.parseDocument() if err != nil { return nil, fmt.Errorf("syntax analysis failed: %w", err) } // Build final flowchart return p.buildFlowchart(), nil } // parseDocument implements the top-level grammar rule // document: graphStatement | document graphStatement func (p *Parser) parseDocument() error { for !p.isAtEnd() { if err := p.parseStatement(); err != nil { return err } } return nil } // parseStatement parses individual statements func (p *Parser) parseStatement() error { if p.isAtEnd() { return nil } token := p.peek() switch token.Type { case lexer.TokenGraph: return p.parseGraphStatement() case lexer.TokenSubgraph: return p.parseSubgraphStatement() case lexer.TokenClass: return p.parseClassStatement() case lexer.TokenClassDef: return p.parseClassDefStatement() case lexer.TokenStyle: return p.parseStyleStatement() case lexer.TokenLinkStyle: return p.parseLinkStyleStatement() case lexer.TokenClick: return p.parseClickStatement() case lexer.TokenNewline: p.advance() // Skip newlines return nil case lexer.TokenEOF: return nil default: // Try to parse as edge statement return p.parseEdgeStatement() } } // parseGraphStatement: GRAPH dir? (NL graphStatementList)? func (p *Parser) parseGraphStatement() error { if !p.check(lexer.TokenGraph) { return p.error("expected 'graph'") } p.advance() // Optional direction if p.checkDirection() { dir := p.advance() p.flowDB.direction = dir.Value } // Optional newline if p.check(lexer.TokenNewline) { p.advance() } return nil } // parseSubgraphStatement handles subgraph definitions func (p *Parser) parseSubgraphStatement() error { if !p.check(lexer.TokenSubgraph) { return p.error("expected 'subgraph'") } p.advance() // Parse subgraph ID (optional) var subgraphID string var title string if p.check(lexer.TokenID) { subgraphID = p.advance().Value } else if p.check(lexer.TokenString) { // Quoted title becomes both ID and title titleToken := p.advance().Value title = titleToken[1 : len(titleToken)-1] // Remove quotes subgraphID = title } // Check for explicit title in square brackets if p.check(lexer.TokenOpenBracket) { p.advance() // consume [ titleParts := make([]string, 0) for !p.check(lexer.TokenCloseBracket) && !p.isAtEnd() { titleParts = append(titleParts, p.advance().Value) } if p.check(lexer.TokenCloseBracket) { p.advance() // consume ] title = strings.Join(titleParts, "") } } // Create subgraph subgraph := &ast.FlowSubGraph{ ID: subgraphID, Title: title, LabelType: "text", Classes: make([]string, 0), Nodes: make([]string, 0), } // Store current parsing state oldCurrent := p.current // Parse subgraph content until 'end' for !p.check(lexer.TokenEnd) && !p.isAtEnd() { if p.check(lexer.TokenNewline) { p.advance() continue } // Save state before parsing statement beforeStatement := p.current // Try to parse as edge statement (this will add vertices and edges) err := p.parseEdgeStatement() if err != nil { // If edge parsing failed, skip to next statement p.current = beforeStatement p.skipToNextStatement() continue } // Collect all vertices referenced in the statements parsed within this subgraph for i := oldCurrent; i < p.current; i++ { token := p.tokens[i] if token.Type == lexer.TokenID { // Check if this ID is a vertex if vertex, exists := p.flowDB.vertices[token.Value]; exists { // Add to subgraph nodes if not already present found := false for _, nodeID := range subgraph.Nodes { if nodeID == vertex.ID { found = true break } } if !found { subgraph.Nodes = append(subgraph.Nodes, vertex.ID) } } } } } if p.check(lexer.TokenEnd) { p.advance() } // Add subgraph to flowDB p.flowDB.subGraphs = append(p.flowDB.subGraphs, subgraph) p.flowDB.subGraphLookup[subgraphID] = subgraph return nil } // parseClassStatement handles class assignments func (p *Parser) parseClassStatement() error { if !p.check(lexer.TokenClass) { return p.error("expected 'class'") } p.advance() // Parse node list (comma separated) nodeIDs := make([]string, 0) for { if !p.check(lexer.TokenID) { break } nodeIDs = append(nodeIDs, p.advance().Value) if p.check(lexer.TokenComma) { p.advance() // consume comma } else { break } } // Parse class name if !p.check(lexer.TokenID) { return p.error("expected class name") } className := p.advance().Value // Apply class to nodes for _, nodeID := range nodeIDs { // Ensure vertex exists if _, exists := p.flowDB.vertices[nodeID]; !exists { p.addVertex(nodeID, "", "") } vertex := p.flowDB.vertices[nodeID] vertex.Classes = append(vertex.Classes, className) } return nil } // parseClassDefStatement handles class definitions func (p *Parser) parseClassDefStatement() error { if !p.check(lexer.TokenClassDef) { return p.error("expected 'classDef'") } p.advance() // Parse class name if !p.check(lexer.TokenID) { return p.error("expected class name") } className := p.advance().Value // Parse style definitions (everything until newline) styles := make([]string, 0) for !p.check(lexer.TokenNewline) && !p.isAtEnd() { token := p.advance() styles = append(styles, token.Value) } // Create class definition class := &ast.FlowClass{ ID: className, Styles: styles, TextStyles: make([]string, 0), } p.flowDB.classes[className] = class return nil } // parseStyleStatement handles style definitions func (p *Parser) parseStyleStatement() error { if !p.check(lexer.TokenStyle) { return p.error("expected 'style'") } p.advance() // Parse node ID if !p.check(lexer.TokenID) { return p.error("expected node ID") } nodeID := p.advance().Value // Parse style definitions (everything until newline) styles := make([]string, 0) for !p.check(lexer.TokenNewline) && !p.isAtEnd() { token := p.advance() styles = append(styles, token.Value) } // Ensure vertex exists if _, exists := p.flowDB.vertices[nodeID]; !exists { p.addVertex(nodeID, "", "") } // Apply styles to vertex vertex := p.flowDB.vertices[nodeID] vertex.Styles = append(vertex.Styles, styles...) return nil } // parseLinkStyleStatement handles link style definitions func (p *Parser) parseLinkStyleStatement() error { if !p.check(lexer.TokenLinkStyle) { return p.error("expected 'linkStyle'") } p.advance() // Skip implementation for now return p.skipToNextStatement() } // parseClickStatement handles click event definitions func (p *Parser) parseClickStatement() error { if !p.check(lexer.TokenClick) { return p.error("expected 'click'") } p.advance() // consume 'click' // Parse node ID if !p.check(lexer.TokenID) { return p.error("expected node ID after 'click'") } nodeID := p.advance().Value // Parse click action (callback or href) clickEvent := &ast.ClickEvent{ NodeID: nodeID, } if p.check(lexer.TokenID) || p.check(lexer.TokenString) { action := p.advance().Value // Remove quotes if it's a string if strings.HasPrefix(action, "\"") && strings.HasSuffix(action, "\"") { action = action[1 : len(action)-1] } // Check if it's a callback (function call) or URL if strings.Contains(action, "http") || strings.Contains(action, "www.") { clickEvent.Link = &action } else { clickEvent.Callback = &action } } // Parse optional target for links if p.check(lexer.TokenString) { target := p.advance().Value target = target[1 : len(target)-1] // Remove quotes clickEvent.Target = &target } // Apply click event to vertex if vertex, exists := p.flowDB.vertices[nodeID]; exists { vertex.OnClick = clickEvent } else { // Ensure vertex exists p.addVertex(nodeID, "", "") p.flowDB.vertices[nodeID].OnClick = clickEvent } return nil } // parseEdgeStatement parses edge definitions // This is the core parsing logic for flowchart connections func (p *Parser) parseEdgeStatement() error { // Parse start vertex startVertex, err := p.parseVertex() if err != nil { return err } // Parse edge edge, err := p.parseEdge() if err != nil { return err } // Parse end vertex endVertex, err := p.parseVertex() if err != nil { return err } // Create edge in flowDB return p.addEdge(startVertex, endVertex, edge) } // parseVertex parses vertex definitions with shapes // Examples: A[Text], B(Text), C{Text}, etc. func (p *Parser) parseVertex() (*VertexInfo, error) { if !p.check(lexer.TokenID) { return nil, p.error("expected vertex identifier") } id := p.advance().Value vertex := &VertexInfo{ID: id} // Check for shape definition if p.checkShapeStart() { shape, text, err := p.parseShape() if err != nil { return nil, err } vertex.Shape = shape vertex.Text = text // Add vertex to flowDB p.addVertex(id, text, shape) } return vertex, nil } // VertexInfo holds parsed vertex information type VertexInfo struct { ID string Text string Shape ast.FlowVertexTypeParam } // EdgeInfo holds parsed edge information type EdgeInfo struct { Type string Text string Length int Stroke ast.FlowEdgeStroke } // parseShape parses shape definitions [text], (text), {text}, etc. func (p *Parser) parseShape() (ast.FlowVertexTypeParam, string, error) { startToken := p.peek() var shape ast.FlowVertexTypeParam var endToken lexer.TokenType switch startToken.Type { case lexer.TokenOpenBracket: // Check for special bracket shapes if p.checkSequence([]lexer.TokenType{lexer.TokenSlash}) { shape = ast.VertexTypeLeanRight p.advance() // consume [ p.advance() // consume / endToken = lexer.TokenSlash } else if p.checkSequence([]lexer.TokenType{lexer.TokenBackslash}) { shape = ast.VertexTypeLeanLeft p.advance() // consume [ p.advance() // consume \ endToken = lexer.TokenBackslash } else { shape = ast.VertexTypeRect endToken = lexer.TokenCloseBracket p.advance() // consume [ } case lexer.TokenOpenParen: if p.checkNext(lexer.TokenOpenParen) { // ((text)) shape = ast.VertexTypeCircle p.advance() // skip first ( p.advance() // skip second ( endToken = lexer.TokenCloseParen } else { // (text) shape = ast.VertexTypeRound p.advance() // consume ( endToken = lexer.TokenCloseParen } case lexer.TokenOpenBrace: shape = ast.VertexTypeDiamond p.advance() // consume { endToken = lexer.TokenCloseBrace case lexer.TokenOpenDoubleParen: shape = ast.VertexTypeCircle p.advance() // consume (( endToken = lexer.TokenCloseDoubleParen case lexer.TokenCloseAngle: // Check for flag shape >text] shape = ast.VertexTypeFlag p.advance() // consume > endToken = lexer.TokenCloseBracket default: return "", "", p.error("expected shape delimiter") } // Parse text content text := "" for !p.check(endToken) && !p.isAtEnd() { if endToken == lexer.TokenSlash && p.check(lexer.TokenSlash) { break } if endToken == lexer.TokenBackslash && p.check(lexer.TokenBackslash) { break } if p.check(lexer.TokenString) { // Remove quotes from string val := p.advance().Value text = val[1 : len(val)-1] // Remove surrounding quotes } else { text += p.advance().Value } } // Consume closing delimiter(s) switch endToken { case lexer.TokenSlash: if !p.check(lexer.TokenSlash) { return "", "", p.error("expected closing /") } p.advance() // consume / if !p.check(lexer.TokenCloseBracket) { return "", "", p.error("expected closing ]") } p.advance() // consume ] case lexer.TokenBackslash: if !p.check(lexer.TokenBackslash) { return "", "", p.error("expected closing \\") } p.advance() // consume \ if !p.check(lexer.TokenCloseBracket) { return "", "", p.error("expected closing ]") } p.advance() // consume ] case lexer.TokenCloseParen: if !p.check(endToken) { return "", "", p.error("expected closing delimiter") } p.advance() // consume closing delimiter // Handle double paren closing if shape == ast.VertexTypeCircle && startToken.Type == lexer.TokenOpenParen { if !p.check(lexer.TokenCloseParen) { return "", "", p.error("expected second closing parenthesis") } p.advance() } default: if !p.check(endToken) { return "", "", p.error("expected closing delimiter") } p.advance() // consume closing delimiter } return shape, text, nil } // parseEdge parses edge definitions with arrows and labels func (p *Parser) parseEdge() (*EdgeInfo, error) { edge := &EdgeInfo{ Stroke: ast.StrokeNormal, Length: 1, } // Parse edge label if present (|text|) if p.check(lexer.TokenPipe) { p.advance() // consume | // Collect text until next | text := "" for !p.check(lexer.TokenPipe) && !p.isAtEnd() { text += p.advance().Value } if !p.check(lexer.TokenPipe) { return nil, p.error("expected closing pipe for edge label") } p.advance() // consume closing | edge.Text = text } // Parse arrow type if !p.checkArrow() { return nil, p.error("expected arrow") } arrow := p.advance() edge.Type, edge.Stroke = p.parseArrowType(arrow.Value) return edge, nil } // parseArrowType extracts type and stroke from arrow token func (p *Parser) parseArrowType(arrow string) (string, ast.FlowEdgeStroke) { switch arrow { case "-->": return "arrow_point", ast.StrokeNormal case "-.->": return "arrow_point", ast.StrokeDotted case "==>": return "arrow_point", ast.StrokeThick case "--x": return "arrow_cross", ast.StrokeNormal case "--o": return "arrow_circle", ast.StrokeNormal case "---": return "arrow_open", ast.StrokeNormal default: return "arrow_point", ast.StrokeNormal } } // FlowDB manipulation methods (mirroring mermaid.js FlowDB) // addVertex adds a vertex to the flow database func (p *Parser) addVertex(id, text string, vertexType ast.FlowVertexTypeParam) { vertex := p.flowDB.vertices[id] if vertex == nil { vertex = &ast.FlowVertex{ ID: id, LabelType: "text", DomID: fmt.Sprintf("flowchart-%s-%d", id, p.flowDB.vertexCounter), Styles: make([]string, 0), Classes: make([]string, 0), } p.flowDB.vertices[id] = vertex p.flowDB.vertexCounter++ } if text != "" { vertex.Text = &text } if vertexType != "" { vertex.Type = &vertexType } } // addEdge adds an edge to the flow database func (p *Parser) addEdge(start, end *VertexInfo, edge *EdgeInfo) error { // Ensure vertices exist p.addVertex(start.ID, start.Text, start.Shape) p.addVertex(end.ID, end.Text, end.Shape) // Create edge flowEdge := &ast.FlowEdge{ Start: start.ID, End: end.ID, Text: edge.Text, LabelType: "text", Classes: make([]string, 0), IsUserDefinedID: false, } if edge.Type != "" { flowEdge.Type = &edge.Type } if edge.Stroke != "" { flowEdge.Stroke = &edge.Stroke } // Generate edge ID edgeID := fmt.Sprintf("L-%s-%s-%d", start.ID, end.ID, len(p.flowDB.edges)) flowEdge.ID = edgeID p.flowDB.edges = append(p.flowDB.edges, flowEdge) return nil } // buildFlowchart creates the final flowchart from flowDB state func (p *Parser) buildFlowchart() *ast.Flowchart { flowchart := ast.NewFlowchart() flowchart.Direction = p.flowDB.direction flowchart.Vertices = p.flowDB.vertices flowchart.Edges = p.flowDB.edges flowchart.Classes = p.flowDB.classes flowchart.SubGraphs = p.flowDB.subGraphs flowchart.SubGraphLookup = p.flowDB.subGraphLookup flowchart.Tooltips = p.flowDB.tooltips flowchart.Version = p.flowDB.version return flowchart } // Helper methods // check returns true if current token matches the given type func (p *Parser) check(tokenType lexer.TokenType) bool { if p.isAtEnd() { return false } return p.peek().Type == tokenType } // checkNext returns true if next token matches the given type func (p *Parser) checkNext(tokenType lexer.TokenType) bool { if p.current+1 >= len(p.tokens) { return false } return p.tokens[p.current+1].Type == tokenType } // checkSequence checks if the current position plus offset matches a sequence of token types func (p *Parser) checkSequence(types []lexer.TokenType) bool { for i, tokenType := range types { if p.current+1+i >= len(p.tokens) { return false } if p.tokens[p.current+1+i].Type != tokenType { return false } } return true } // checkDirection returns true if current token is a direction func (p *Parser) checkDirection() bool { if p.isAtEnd() { return false } tokenType := p.peek().Type return tokenType == lexer.TokenTD || tokenType == lexer.TokenTB || tokenType == lexer.TokenBT || tokenType == lexer.TokenRL || tokenType == lexer.TokenLR } // checkShapeStart returns true if current token starts a shape func (p *Parser) checkShapeStart() bool { if p.isAtEnd() { return false } tokenType := p.peek().Type return tokenType == lexer.TokenOpenBracket || tokenType == lexer.TokenOpenParen || tokenType == lexer.TokenOpenBrace || tokenType == lexer.TokenOpenDoubleParen || tokenType == lexer.TokenCloseAngle // for flag shape >text] } // checkArrow returns true if current token is an arrow func (p *Parser) checkArrow() bool { if p.isAtEnd() { return false } tokenType := p.peek().Type return tokenType == lexer.TokenArrowSolid || tokenType == lexer.TokenArrowDotted || tokenType == lexer.TokenArrowThick || tokenType == lexer.TokenArrowOpen || tokenType == lexer.TokenArrowPoint || tokenType == lexer.TokenArrowCross || tokenType == lexer.TokenArrowCircle } // advance consumes and returns the current token func (p *Parser) advance() lexer.Token { if !p.isAtEnd() { p.current++ } return p.previous() } // isAtEnd returns true if we've reached the end of tokens func (p *Parser) isAtEnd() bool { return p.current >= len(p.tokens) || p.peek().Type == lexer.TokenEOF } // peek returns the current token without advancing func (p *Parser) peek() lexer.Token { if p.current >= len(p.tokens) { return lexer.Token{Type: lexer.TokenEOF} } return p.tokens[p.current] } // previous returns the previous token func (p *Parser) previous() lexer.Token { if p.current <= 0 { return lexer.Token{Type: lexer.TokenEOF} } return p.tokens[p.current-1] } // error creates a parsing error func (p *Parser) error(message string) error { token := p.peek() return fmt.Errorf("parse error at line %d, column %d: %s (got %s)", token.Line, token.Column, message, token.Type) } // skipToNextStatement skips tokens until next statement func (p *Parser) skipToNextStatement() error { for !p.isAtEnd() && !p.check(lexer.TokenNewline) { p.advance() } if p.check(lexer.TokenNewline) { p.advance() } return nil }