// Package parser provides ER diagram parsing based on erDiagram.jison package parser import ( "fmt" "strings" "mermaid-go/pkg/ast" "mermaid-go/pkg/lexer" ) // ERParser implements ER diagram parsing following erDiagram.jison type ERParser struct { tokens []lexer.Token current int diagram *ast.ERDiagram entityMap map[string]*ast.EREntity // Keep track of entities by name for quick lookup } // NewERParser creates a new ER parser func NewERParser() *ERParser { return &ERParser{ diagram: ast.NewERDiagram(), entityMap: make(map[string]*ast.EREntity), } } // Parse parses ER diagram syntax func (p *ERParser) Parse(input string) (*ast.ERDiagram, error) { // Tokenize l := lexer.NewLexer(input) tokens, err := l.Tokenize() if err != nil { return nil, fmt.Errorf("lexical analysis failed: %w", err) } // Filter tokens p.tokens = lexer.FilterTokens(tokens) p.current = 0 p.diagram = ast.NewERDiagram() // Parse document err = p.parseDocument() if err != nil { return nil, fmt.Errorf("syntax analysis failed: %w", err) } return p.diagram, nil } // parseDocument parses the ER diagram document func (p *ERParser) parseDocument() error { // Expect erDiagram if !p.check(lexer.TokenID) || p.peek().Value != "erDiagram" { return p.error("expected 'erDiagram'") } p.advance() // Parse statements for !p.isAtEnd() { if err := p.parseStatement(); err != nil { return err } } return nil } // parseStatement parses individual ER diagram statements func (p *ERParser) parseStatement() error { if p.isAtEnd() { return nil } switch { case p.check(lexer.TokenNewline): p.advance() // Skip newlines return nil case p.checkKeyword("direction"): return p.parseDirectionStatement() case p.check(lexer.TokenID): // Try to parse as entity or relationship return p.parseEntityOrRelationship() default: token := p.peek() return p.error(fmt.Sprintf("unexpected token: %s", token.Value)) } } // parseEntityOrRelationship attempts to parse either an entity definition or a relationship func (p *ERParser) parseEntityOrRelationship() error { entityName := p.advance().Value // Check if this is a relationship (has cardinality symbols) if p.checkCardinality() { return p.parseRelationship(entityName) } // Check if this is an entity with attributes (has {) if p.check(lexer.TokenOpenBrace) { return p.parseEntityWithAttributes(entityName) } // Simple entity without attributes p.addEntity(entityName) return nil } // parseRelationship parses a relationship between two entities func (p *ERParser) parseRelationship(fromEntity string) error { // Parse relationship type (already tokenized as compound token) relType, err := p.parseRelType() if err != nil { return err } // Parse second entity if !p.check(lexer.TokenID) { return p.error("expected second entity name") } toEntity := p.advance().Value // Ensure both entities exist p.addEntity(fromEntity) p.addEntity(toEntity) // Parse optional label var label *string if p.check(lexer.TokenColon) { p.advance() // consume ':' var labelParts []string for !p.check(lexer.TokenNewline) && !p.isAtEnd() { labelParts = append(labelParts, p.advance().Value) } labelStr := strings.TrimSpace(strings.Join(labelParts, " ")) label = &labelStr } // Create relationship relation := &ast.ERRelation{ From: fromEntity, To: toEntity, Type: relType, Label: label, } p.diagram.Relations = append(p.diagram.Relations, relation) return nil } // parseEntityWithAttributes parses an entity with attribute definitions func (p *ERParser) parseEntityWithAttributes(entityName string) error { p.advance() // consume '{' entity := p.addEntity(entityName) // Parse attributes for !p.check(lexer.TokenCloseBrace) && !p.isAtEnd() { if p.check(lexer.TokenNewline) { p.advance() continue } attribute, err := p.parseAttribute() if err != nil { return err } entity.Attributes = append(entity.Attributes, attribute) } if !p.check(lexer.TokenCloseBrace) { return p.error("expected '}' to close entity attributes") } p.advance() // consume '}' return nil } // parseAttribute parses an attribute definition func (p *ERParser) parseAttribute() (*ast.ERAttribute, error) { // Parse attribute type if !p.check(lexer.TokenID) { return nil, p.error("expected attribute type") } attrType := p.advance().Value // Parse attribute name if !p.check(lexer.TokenID) { return nil, p.error("expected attribute name") } attrName := p.advance().Value attribute := &ast.ERAttribute{ Type: attrType, Name: attrName, } // Parse optional key (PK, FK, UK) if p.check(lexer.TokenID) && p.isKeyWord() { keyStr := p.advance().Value key := ast.ERKeyType(keyStr) attribute.Key = &key } // Parse optional comment (quoted string) if p.check(lexer.TokenString) { comment := p.advance().Value // Remove quotes if strings.HasPrefix(comment, "\"") && strings.HasSuffix(comment, "\"") { comment = comment[1 : len(comment)-1] } attribute.Comment = &comment } return attribute, nil } // parseRelType parses relationship type symbols func (p *ERParser) parseRelType() (ast.ERRelationType, error) { if p.isAtEnd() { return "", p.error("expected relationship type") } token := p.peek() // Check for compound ER relationship tokens first switch token.Type { case lexer.TokenEROneToMany: p.advance() return ast.ERRelationOneToMany, nil case lexer.TokenEROneToManyAlt: p.advance() return ast.ERRelationOneToManyAlt, nil case lexer.TokenERManyToOne: p.advance() return ast.ERRelationManyToOne, nil case lexer.TokenEROneToOne: p.advance() return ast.ERRelationOneToOne, nil case lexer.TokenERManyToMany: p.advance() return ast.ERRelationManyToMany, nil case lexer.TokenERManyToManyAlt: p.advance() return ast.ERRelationManyToManyAlt, nil case lexer.TokenERZeroToOne: p.advance() return ast.ERRelationZeroToOne, nil } // Fall back to individual token parsing for patterns not covered by compound tokens // Look ahead to match relationship patterns if p.matchString("||--||") { p.advance() // consume '|' p.advance() // consume '|' p.advance() // consume '-' p.advance() // consume '-' p.advance() // consume '|' p.advance() // consume '|' return ast.ERRelationOneToOne, nil } if p.matchString("||--o{") { p.advance() // consume '|' p.advance() // consume '|' p.advance() // consume '-' p.advance() // consume '-' p.advance() // consume 'o' p.advance() // consume '{' return ast.ERRelationOneToMany, nil } if p.matchString("}o--||") { p.advance() // consume '}' p.advance() // consume 'o' p.advance() // consume '-' p.advance() // consume '-' p.advance() // consume '|' p.advance() // consume '|' return ast.ERRelationManyToOne, nil } if p.matchString("}o--o{") { p.advance() // consume '}' p.advance() // consume 'o' p.advance() // consume '-' p.advance() // consume '-' p.advance() // consume 'o' p.advance() // consume '{' return ast.ERRelationManyToMany, nil } if p.matchString("||--o|") { p.advance() // consume '|' p.advance() // consume '|' p.advance() // consume '-' p.advance() // consume '-' p.advance() // consume 'o' p.advance() // consume '|' return ast.ERRelationZeroToOne, nil } if p.matchString("}o..o{") { p.advance() // consume '}' p.advance() // consume 'o' p.advance() // consume '.' p.advance() // consume '.' p.advance() // consume 'o' p.advance() // consume '{' return ast.ERRelationManyToMany, nil } if p.matchString("||..||") { p.advance() // consume '|' p.advance() // consume '|' p.advance() // consume '.' p.advance() // consume '.' p.advance() // consume '|' p.advance() // consume '|' return ast.ERRelationOneToOne, nil } return "", p.error("unrecognized relationship pattern") } // Helper methods func (p *ERParser) addEntity(name string) *ast.EREntity { if entity, exists := p.entityMap[name]; exists { return entity } entity := &ast.EREntity{ ID: fmt.Sprintf("entity-%s-%d", name, len(p.diagram.Entities)), Name: name, Attributes: make([]*ast.ERAttribute, 0), CssClasses: []string{"default"}, } p.entityMap[name] = entity p.diagram.Entities = append(p.diagram.Entities, entity) return entity } func (p *ERParser) checkCardinality() bool { if p.isAtEnd() { return false } token := p.peek() // Check for compound ER relationship tokens switch token.Type { case lexer.TokenEROneToMany, lexer.TokenEROneToManyAlt, lexer.TokenERManyToOne, lexer.TokenEROneToOne, lexer.TokenERManyToMany, lexer.TokenERManyToManyAlt, lexer.TokenERZeroToOne: return true } // Fall back to string matching for patterns not covered by compound tokens return p.matchString("||--||") || p.matchString("||--o{") || p.matchString("}o--||") || p.matchString("}o--o{") || p.matchString("||--o|") || p.matchString("}o..o{") || p.matchString("||..||") } func (p *ERParser) isKeyWord() bool { if p.isAtEnd() { return false } token := p.peek() return token.Type == lexer.TokenID && (token.Value == "PK" || token.Value == "FK" || token.Value == "UK") } func (p *ERParser) matchString(s string) bool { if p.current+len(s)-1 >= len(p.tokens) { return false } var actual strings.Builder for i := 0; i < len(s); i++ { if p.current+i >= len(p.tokens) { return false } actual.WriteString(p.tokens[p.current+i].Value) } return actual.String() == s } func (p *ERParser) check(tokenType lexer.TokenType) bool { if p.isAtEnd() { return false } return p.peek().Type == tokenType } func (p *ERParser) checkKeyword(keyword string) bool { if p.isAtEnd() { return false } token := p.peek() return token.Type == lexer.TokenID && strings.EqualFold(token.Value, keyword) } func (p *ERParser) advance() lexer.Token { if !p.isAtEnd() { p.current++ } return p.previous() } func (p *ERParser) isAtEnd() bool { return p.current >= len(p.tokens) || p.peek().Type == lexer.TokenEOF } func (p *ERParser) peek() lexer.Token { if p.current >= len(p.tokens) { return lexer.Token{Type: lexer.TokenEOF} } return p.tokens[p.current] } func (p *ERParser) previous() lexer.Token { if p.current <= 0 { return lexer.Token{Type: lexer.TokenEOF} } return p.tokens[p.current-1] } func (p *ERParser) parseDirectionStatement() error { p.advance() // consume 'direction' if !p.check(lexer.TokenID) { return p.error("expected direction (TB, BT, RL, LR)") } // For now, we'll just consume the direction token // The existing ERDiagram struct doesn't have a Direction field p.advance() return nil } func (p *ERParser) error(message string) error { token := p.peek() return fmt.Errorf("parse error at line %d, column %d: %s (got %s)", token.Line, token.Column, message, token.Type.String()) }