// Package parser provides state diagram parsing based on stateDiagram.jison package parser import ( "fmt" "strings" "mermaid-go/pkg/ast" "mermaid-go/pkg/lexer" ) // StateParser implements state diagram parsing following stateDiagram.jison type StateParser struct { tokens []lexer.Token current int diagram *ast.StateDiagram stateMap map[string]*ast.StateNode // Keep track of states by name for quick lookup } // NewStateParser creates a new state parser func NewStateParser() *StateParser { return &StateParser{ diagram: ast.NewStateDiagram(), stateMap: make(map[string]*ast.StateNode), } } // Parse parses state diagram syntax func (p *StateParser) Parse(input string) (*ast.StateDiagram, error) { // Tokenize l := lexer.NewLexer(input) tokens, err := l.Tokenize() if err != nil { return nil, fmt.Errorf("lexical analysis failed: %w", err) } // Filter tokens p.tokens = lexer.FilterTokens(tokens) p.current = 0 p.diagram = ast.NewStateDiagram() p.stateMap = make(map[string]*ast.StateNode) // Parse document err = p.parseDocument() if err != nil { return nil, fmt.Errorf("syntax analysis failed: %w", err) } return p.diagram, nil } // parseDocument parses the state diagram document func (p *StateParser) parseDocument() error { // Expect stateDiagram or stateDiagram-v2 if !p.check(lexer.TokenID) || (p.peek().Value != "stateDiagram" && p.peek().Value != "stateDiagram-v2") { return p.error("expected 'stateDiagram' or 'stateDiagram-v2'") } p.advance() // Parse statements for !p.isAtEnd() { if err := p.parseStatement(); err != nil { return err } } return nil } // parseStatement parses individual state diagram statements func (p *StateParser) parseStatement() error { if p.isAtEnd() { return nil } switch { case p.check(lexer.TokenNewline): p.advance() // Skip newlines return nil case p.checkKeyword("direction"): return p.parseDirection() case p.checkKeyword("note"): return p.parseNote() case p.checkKeyword("state"): return p.parseState() case p.check(lexer.TokenEntry): return p.parseStateAction("entry") case p.check(lexer.TokenExit): return p.parseStateAction("exit") case p.check(lexer.TokenDo): return p.parseStateAction("do") case p.check(lexer.TokenOpenBracket): // Handle [*] start/end states return p.parseStartEndState() case p.check(lexer.TokenID): // Try to parse as state or transition return p.parseStateOrTransition() default: token := p.peek() return p.error(fmt.Sprintf("unexpected token: %s", token.Value)) } } // parseState parses state declarations func (p *StateParser) parseState() error { p.advance() // consume 'state' if !p.check(lexer.TokenID) { return p.error("expected state name") } stateName := p.advance().Value // Check if state already exists var state *ast.StateNode if existingState, exists := p.stateMap[stateName]; exists { state = existingState } else { state = &ast.StateNode{ ID: stateName, Label: stateName, Type: ast.StateTypeDefault, SubStates: make(map[string]*ast.StateNode), CssClasses: make([]string, 0), } } // Check for 'as' alias if p.checkKeyword("as") { p.advance() // consume 'as' if !p.check(lexer.TokenID) && !p.check(lexer.TokenString) { return p.error("expected state label after 'as'") } label := p.advance().Value if strings.HasPrefix(label, "\"") && strings.HasSuffix(label, "\"") { label = label[1 : len(label)-1] // Remove quotes } state.Label = label } // Check for state body (composite state) if p.check(lexer.TokenOpenBrace) { p.advance() // consume '{' err := p.parseStateBody(state) if err != nil { return err } if !p.check(lexer.TokenCloseBrace) { return p.error("expected '}'") } p.advance() // consume '}' } // Check for special state types if p.check(lexer.TokenColon) { p.advance() // consume ':' if p.checkKeyword("<>") { p.advance() state.Type = ast.StateTypeFork } else if p.checkKeyword("<>") { p.advance() state.Type = ast.StateTypeJoin } else if p.checkKeyword("<>") { p.advance() state.Type = ast.StateTypeChoice } else if p.checkKeyword("<>") { p.advance() state.Type = ast.StateTypeHistory } else if p.checkKeyword("<>") { p.advance() state.Type = ast.StateTypeDeepHistory } else { // Parse description var descParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { descParts = append(descParts, p.advance().Value) } if len(descParts) > 0 { desc := strings.TrimSpace(strings.Join(descParts, " ")) state.Description = &desc } } } // Only add state if it doesn't already exist if _, exists := p.stateMap[stateName]; !exists { p.stateMap[stateName] = state p.diagram.States = append(p.diagram.States, state) } return nil } // parseStateBody parses the contents of a composite state func (p *StateParser) parseStateBody(parentState *ast.StateNode) error { for !p.check(lexer.TokenCloseBrace) && !p.isAtEnd() { if p.check(lexer.TokenNewline) { p.advance() continue } // Parse substates and transitions within the composite state if p.checkKeyword("state") { // Check if this is a nested composite state // Look ahead: state ID { means nested composite state if p.peekNext().Type == lexer.TokenID && p.current+2 < len(p.tokens) && p.tokens[p.current+2].Type == lexer.TokenOpenBrace { // This is a nested composite state, parse it as a full state if err := p.parseNestedCompositeState(parentState); err != nil { return err } } else { // This is a simple substate if err := p.parseSubState(parentState); err != nil { return err } } } else if p.check(lexer.TokenOpenBracket) { // Handle [*] start/end states within composite state if err := p.parseStartEndStateInComposite(parentState); err != nil { return err } } else if p.check(lexer.TokenID) { // In composite state, ID followed by --> is a transition // We need to parse the transition and ensure the states exist as substates if err := p.parseStateOrTransitionInComposite(parentState); err != nil { return err } } else { token := p.peek() return p.error(fmt.Sprintf("unexpected token in composite state: %s", token.Value)) } } return nil } // parseSubState parses a substate within a composite state func (p *StateParser) parseSubState(parentState *ast.StateNode) error { if !p.checkKeyword("state") { return p.error("expected 'state'") } p.advance() // consume 'state' if !p.check(lexer.TokenID) { return p.error("expected state name") } stateName := p.advance().Value // Create substate subState := &ast.StateNode{ ID: stateName, Label: stateName, Type: ast.StateTypeDefault, SubStates: make(map[string]*ast.StateNode), CssClasses: make([]string, 0), } // Add alias if different from ID if p.checkKeyword("as") { p.advance() // consume 'as' if !p.check(lexer.TokenID) && !p.check(lexer.TokenString) { return p.error("expected alias name") } alias := p.advance().Value // Remove quotes if present if len(alias) > 2 && alias[0] == '"' && alias[len(alias)-1] == '"' { alias = alias[1 : len(alias)-1] } subState.Label = alias } // Check for description or special type if p.check(lexer.TokenColon) { p.advance() // consume ':' if p.checkKeyword("<>") { p.advance() subState.Type = ast.StateTypeFork } else if p.checkKeyword("<>") { p.advance() subState.Type = ast.StateTypeJoin } else if p.checkKeyword("<>") { p.advance() subState.Type = ast.StateTypeChoice } else if p.checkKeyword("<>") { p.advance() subState.Type = ast.StateTypeHistory } else if p.checkKeyword("<>") { p.advance() subState.Type = ast.StateTypeDeepHistory } else { // Parse description var descParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { descParts = append(descParts, p.advance().Value) } if len(descParts) > 0 { desc := strings.TrimSpace(strings.Join(descParts, " ")) subState.Description = &desc } } } // Add to parent state's substates parentState.SubStates[subState.ID] = subState return nil } // parseNestedCompositeState parses a nested composite state within a parent composite state func (p *StateParser) parseNestedCompositeState(parentState *ast.StateNode) error { if !p.checkKeyword("state") { return p.error("expected 'state'") } p.advance() // consume 'state' if !p.check(lexer.TokenID) { return p.error("expected state name") } stateName := p.advance().Value // Create nested composite state nestedState := &ast.StateNode{ ID: stateName, Label: stateName, Type: ast.StateTypeDefault, SubStates: make(map[string]*ast.StateNode), CssClasses: make([]string, 0), } // Check for 'as' alias if p.checkKeyword("as") { p.advance() // consume 'as' if !p.check(lexer.TokenID) && !p.check(lexer.TokenString) { return p.error("expected alias name") } alias := p.advance().Value // Remove quotes if present if len(alias) > 2 && alias[0] == '"' && alias[len(alias)-1] == '"' { alias = alias[1 : len(alias)-1] } nestedState.Label = alias } // Parse the nested composite state body if p.check(lexer.TokenOpenBrace) { p.advance() // consume '{' err := p.parseStateBody(nestedState) if err != nil { return err } if !p.check(lexer.TokenCloseBrace) { return p.error("expected '}'") } p.advance() // consume '}' } // Add to parent state's substates parentState.SubStates[nestedState.ID] = nestedState return nil } // parseStateOrTransitionInComposite parses a state or transition within a composite state func (p *StateParser) parseStateOrTransitionInComposite(parentState *ast.StateNode) error { // This should be a transition within the composite state // Parse the transition normally, but ensure states are added as substates return p.parseStateOrTransitionWithParent(parentState) } // parseStateOrTransitionWithParent parses a state or transition with a parent composite state func (p *StateParser) parseStateOrTransitionWithParent(parentState *ast.StateNode) error { stateName := p.advance().Value // Ensure state exists as substate in parent p.ensureStateAsSubstate(parentState, stateName) // Check for transition arrow if p.checkArrow() { return p.parseTransitionWithParent(parentState, stateName) } // For now, just handle transitions in composite states // Other cases (state actions, descriptions) can be handled later return nil } // ensureStateAsSubstate ensures a state exists as a substate in the parent func (p *StateParser) ensureStateAsSubstate(parentState *ast.StateNode, id string) { if _, exists := parentState.SubStates[id]; !exists { subState := &ast.StateNode{ ID: id, Label: id, Type: ast.StateTypeDefault, SubStates: make(map[string]*ast.StateNode), CssClasses: make([]string, 0), } parentState.SubStates[id] = subState } } // parseTransitionWithParent parses a transition within a composite state func (p *StateParser) parseTransitionWithParent(parentState *ast.StateNode, fromState string) error { if !p.checkArrow() { return p.error("expected transition arrow") } p.advance() // consume arrow if p.isAtEnd() { return p.error("unexpected end of input") } var toState string if p.check(lexer.TokenOpenBracket) { // Handle [*] end state p.advance() // consume '[' if !p.check(lexer.TokenMult) { return p.error("expected '*' in [*]") } p.advance() // consume '*' if !p.check(lexer.TokenCloseBracket) { return p.error("expected ']' in [*]") } p.advance() // consume ']' toState = "[*]" } else if p.check(lexer.TokenID) { toState = p.advance().Value // Ensure toState exists as substate p.ensureStateAsSubstate(parentState, toState) } else { return p.error("expected state name or [*]") } // Create transition transition := &ast.StateTransition{ From: fromState, To: toState, } // Parse transition decorations if present if p.check(lexer.TokenColon) { p.advance() // consume ':' var labelParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { labelParts = append(labelParts, p.advance().Value) } if len(labelParts) > 0 { label := strings.TrimSpace(strings.Join(labelParts, " ")) transition.Label = &label } } // Add transition to diagram p.diagram.Transitions = append(p.diagram.Transitions, transition) return nil } // parseStartEndStateInComposite parses [*] transitions within a composite state func (p *StateParser) parseStartEndStateInComposite(parentState *ast.StateNode) error { if !p.check(lexer.TokenOpenBracket) { return p.error("expected '['") } p.advance() // consume '[' if !p.check(lexer.TokenMult) { return p.error("expected '*'") } p.advance() // consume '*' if !p.check(lexer.TokenCloseBracket) { return p.error("expected ']'") } p.advance() // consume ']' // Expect arrow if !p.checkArrow() { return p.error("expected transition arrow") } p.advance() // consume arrow // Parse target state if p.isAtEnd() { return p.error("unexpected end of input") } var toState string if p.check(lexer.TokenOpenBracket) { // Handle [*] end state p.advance() // consume '[' if !p.check(lexer.TokenMult) { return p.error("expected '*' in [*]") } p.advance() // consume '*' if !p.check(lexer.TokenCloseBracket) { return p.error("expected ']' in [*]") } p.advance() // consume ']' toState = "[*]" } else if p.check(lexer.TokenID) { toState = p.advance().Value // Ensure toState exists as substate p.ensureStateAsSubstate(parentState, toState) } else { return p.error("expected state name or [*]") } // Create transition transition := &ast.StateTransition{ From: "[*]", To: toState, } // Parse transition decorations if present if p.check(lexer.TokenColon) { p.advance() // consume ':' var labelParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { labelParts = append(labelParts, p.advance().Value) } if len(labelParts) > 0 { label := strings.TrimSpace(strings.Join(labelParts, " ")) transition.Label = &label } } // Add transition to diagram p.diagram.Transitions = append(p.diagram.Transitions, transition) return nil } // parseStartEndState parses [*] --> state or state --> [*] transitions func (p *StateParser) parseStartEndState() error { if !p.check(lexer.TokenOpenBracket) { return p.error("expected '['") } p.advance() // consume '[' if !p.check(lexer.TokenMult) { return p.error("expected '*'") } p.advance() // consume '*' if !p.check(lexer.TokenCloseBracket) { return p.error("expected ']'") } p.advance() // consume ']' // Parse arrow if !p.checkArrow() { return p.error("expected transition arrow") } p.parseArrow() if !p.check(lexer.TokenID) { return p.error("expected target state") } targetState := p.advance().Value // Ensure target state exists p.ensureState(targetState) // Create transition transition := &ast.StateTransition{ From: "[*]", To: targetState, } // Check for label if p.check(lexer.TokenColon) { p.advance() // consume ':' var labelParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { labelParts = append(labelParts, p.advance().Value) } if len(labelParts) > 0 { label := strings.TrimSpace(strings.Join(labelParts, " ")) transition.Label = &label } } p.diagram.Transitions = append(p.diagram.Transitions, transition) // Set start state if it's the first [*] transition if p.diagram.StartState == nil { start := "[*]" p.diagram.StartState = &start } return nil } // parseStateOrTransition parses either a state definition or transition func (p *StateParser) parseStateOrTransition() error { stateName := p.advance().Value // Ensure state exists p.ensureState(stateName) // Check for transition arrow if p.checkArrow() { return p.parseTransition(stateName) } // Check for colon (description, special type, or state action) if p.check(lexer.TokenColon) { p.advance() // consume ':' // Check if this is a state action (entry/exit/do) if p.check(lexer.TokenEntry) || p.check(lexer.TokenExit) || p.check(lexer.TokenDo) { var actionType string if p.check(lexer.TokenEntry) { actionType = "entry" } else if p.check(lexer.TokenExit) { actionType = "exit" } else if p.check(lexer.TokenDo) { actionType = "do" } p.advance() // consume action type // Parse action content (everything until newline) var actionParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { actionParts = append(actionParts, p.advance().Value) } actionContent := strings.TrimSpace(strings.Join(actionParts, " ")) state := p.stateMap[stateName] switch actionType { case "entry": state.EntryAction = &actionContent case "exit": state.ExitAction = &actionContent case "do": state.DoAction = &actionContent } } else { // Regular description or special type var descParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { descParts = append(descParts, p.advance().Value) } if len(descParts) > 0 { desc := strings.TrimSpace(strings.Join(descParts, " ")) state := p.stateMap[stateName] // Check if this is a special state type switch desc { case "<>": state.Type = ast.StateTypeFork case "<>": state.Type = ast.StateTypeJoin case "<>": state.Type = ast.StateTypeChoice case "<>": state.Type = ast.StateTypeHistory case "<>": state.Type = ast.StateTypeDeepHistory default: state.Description = &desc } } } } return nil } // parseTransition parses state transitions func (p *StateParser) parseTransition(fromState string) error { p.parseArrow() var toState string if p.check(lexer.TokenOpenBracket) { // Handle --> [*] end state p.advance() // consume '[' if !p.check(lexer.TokenMult) { return p.error("expected '*'") } p.advance() // consume '*' if !p.check(lexer.TokenCloseBracket) { return p.error("expected ']'") } p.advance() // consume ']' toState = "[*]" // Add to end states if not already there found := false for _, endState := range p.diagram.EndStates { if endState == "[*]" { found = true break } } if !found { p.diagram.EndStates = append(p.diagram.EndStates, "[*]") } } else if p.check(lexer.TokenID) { toState = p.advance().Value p.ensureState(toState) } else { return p.error("expected target state") } transition := &ast.StateTransition{ From: fromState, To: toState, } // Optional decorations: ':' label, '[guard]' and '/action' in any order after ':' if p.check(lexer.TokenColon) { p.advance() // consume ':' // Collect rest of the line var parts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { parts = append(parts, p.advance().Value) } raw := strings.TrimSpace(strings.Join(parts, " ")) // Extract [guard] guardStart := strings.Index(raw, "[") guardEnd := strings.Index(raw, "]") if guardStart >= 0 && guardEnd > guardStart { cond := strings.TrimSpace(raw[guardStart+1 : guardEnd]) if cond != "" { transition.Condition = &cond } // Remove guard from raw raw = strings.TrimSpace(raw[:guardStart] + raw[guardEnd+1:]) } // Extract '/action' if slash := strings.Index(raw, "/"); slash >= 0 { action := strings.TrimSpace(raw[slash+1:]) if action != "" { transition.Action = &action } raw = strings.TrimSpace(raw[:slash]) } if raw != "" { lbl := strings.TrimSpace(raw) transition.Label = &lbl } } p.diagram.Transitions = append(p.diagram.Transitions, transition) return nil } // parseArrow parses transition arrows func (p *StateParser) parseArrow() string { token := p.peek() if token.Value == "-->" { p.advance() return "-->" } else if token.Value == "--" && p.checkNext(lexer.TokenCloseAngle) { p.advance() // consume '--' p.advance() // consume '>' return "-->" } // Default p.advance() return "-->" } // parseDirection parses direction statements func (p *StateParser) parseDirection() error { p.advance() // consume 'direction' if !p.check(lexer.TokenID) { return p.error("expected direction value") } direction := p.advance().Value p.diagram.Direction = direction return nil } // parseNote parses note statements - placeholder func (p *StateParser) parseNote() error { p.advance() // consume 'note' // note left of : text // note right of : text // note over : text (treat as over) var place ast.NotePlace if p.checkKeyword("left") { p.advance() if !p.checkKeyword("of") { return p.error("expected 'of' after 'left'") } p.advance() place = ast.NotePlaceLeft } else if p.checkKeyword("right") { p.advance() if !p.checkKeyword("of") { return p.error("expected 'of' after 'right'") } p.advance() place = ast.NotePlaceRight } else if p.checkKeyword("over") { p.advance() place = ast.NotePlaceOver } else { return p.error("expected note placement (left of, right of, over)") } if !p.check(lexer.TokenID) { return p.error("expected state ID for note") } stateID := p.advance().Value if !p.check(lexer.TokenColon) { return p.error("expected ':' after state in note") } p.advance() // Collect text var txt []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { txt = append(txt, p.advance().Value) } noteText := strings.TrimSpace(strings.Join(txt, " ")) // Attach to state; ensure exists p.ensureState(stateID) if st, ok := p.stateMap[stateID]; ok { st.Note = &ast.StateNote{Position: place, Text: noteText} } return nil } // ensureState ensures a state exists, creating it if needed func (p *StateParser) ensureState(id string) { if _, exists := p.stateMap[id]; !exists { state := &ast.StateNode{ ID: id, Label: id, Type: ast.StateTypeDefault, SubStates: make(map[string]*ast.StateNode), CssClasses: make([]string, 0), } p.stateMap[id] = state p.diagram.States = append(p.diagram.States, state) } } // Helper methods func (p *StateParser) check(tokenType lexer.TokenType) bool { if p.isAtEnd() { return false } return p.peek().Type == tokenType } func (p *StateParser) checkNext(tokenType lexer.TokenType) bool { if p.current+1 >= len(p.tokens) { return false } return p.tokens[p.current+1].Type == tokenType } func (p *StateParser) checkKeyword(keyword string) bool { if p.isAtEnd() { return false } token := p.peek() return token.Type == lexer.TokenID && strings.ToLower(token.Value) == strings.ToLower(keyword) } func (p *StateParser) checkArrow() bool { token := p.peek() return token.Value == "-->" || token.Value == "--" } func (p *StateParser) advance() lexer.Token { if !p.isAtEnd() { p.current++ } return p.previous() } func (p *StateParser) isAtEnd() bool { return p.current >= len(p.tokens) || p.peek().Type == lexer.TokenEOF } func (p *StateParser) peek() lexer.Token { if p.current >= len(p.tokens) { return lexer.Token{Type: lexer.TokenEOF} } return p.tokens[p.current] } func (p *StateParser) peekNext() lexer.Token { if p.current+1 >= len(p.tokens) { return lexer.Token{Type: lexer.TokenEOF} } return p.tokens[p.current+1] } func (p *StateParser) previous() lexer.Token { if p.current <= 0 { return lexer.Token{Type: lexer.TokenEOF} } return p.tokens[p.current-1] } func (p *StateParser) error(message string) error { token := p.peek() return fmt.Errorf("parse error at line %d, column %d: %s (got %s)", token.Line, token.Column, message, token.Type.String()) } func (p *StateParser) skipToNextStatement() error { for !p.isAtEnd() && !p.check(lexer.TokenNewline) { p.advance() } if p.check(lexer.TokenNewline) { p.advance() } return nil } // parseStateAction parses state actions (entry, exit, do) func (p *StateParser) parseStateAction(actionType string) error { p.advance() // consume action type (entry/exit/do) // Expect colon if !p.check(lexer.TokenColon) { return p.error("expected ':' after " + actionType) } p.advance() // consume ':' // Parse state name if !p.check(lexer.TokenID) { return p.error("expected state name after " + actionType + " :") } stateName := p.advance().Value // Ensure state exists if _, exists := p.stateMap[stateName]; !exists { // Create state if it doesn't exist state := &ast.StateNode{ ID: stateName, Label: stateName, Type: ast.StateTypeDefault, } p.stateMap[stateName] = state p.diagram.States = append(p.diagram.States, state) } // Parse action content (everything after state name until newline) var actionParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { actionParts = append(actionParts, p.advance().Value) } actionContent := strings.TrimSpace(strings.Join(actionParts, " ")) // Set the appropriate action field state := p.stateMap[stateName] switch actionType { case "entry": state.EntryAction = &actionContent case "exit": state.ExitAction = &actionContent case "do": state.DoAction = &actionContent } return nil }