| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440 |
- // Package parser provides ER diagram parsing based on erDiagram.jison
- package parser
- import (
- "fmt"
- "strings"
- "mermaid-go/pkg/ast"
- "mermaid-go/pkg/lexer"
- )
- // ERParser implements ER diagram parsing following erDiagram.jison
- type ERParser struct {
- tokens []lexer.Token
- current int
- diagram *ast.ERDiagram
- entityMap map[string]*ast.EREntity // Keep track of entities by name for quick lookup
- }
- // NewERParser creates a new ER parser
- func NewERParser() *ERParser {
- return &ERParser{
- diagram: ast.NewERDiagram(),
- entityMap: make(map[string]*ast.EREntity),
- }
- }
- // Parse parses ER diagram syntax
- func (p *ERParser) Parse(input string) (*ast.ERDiagram, error) {
- // Tokenize
- l := lexer.NewLexer(input)
- tokens, err := l.Tokenize()
- if err != nil {
- return nil, fmt.Errorf("lexical analysis failed: %w", err)
- }
- // Filter tokens
- p.tokens = lexer.FilterTokens(tokens)
- p.current = 0
- p.diagram = ast.NewERDiagram()
- // Parse document
- err = p.parseDocument()
- if err != nil {
- return nil, fmt.Errorf("syntax analysis failed: %w", err)
- }
- return p.diagram, nil
- }
- // parseDocument parses the ER diagram document
- func (p *ERParser) parseDocument() error {
- // Expect erDiagram
- if !p.check(lexer.TokenID) || p.peek().Value != "erDiagram" {
- return p.error("expected 'erDiagram'")
- }
- p.advance()
- // Parse statements
- for !p.isAtEnd() {
- if err := p.parseStatement(); err != nil {
- return err
- }
- }
- return nil
- }
- // parseStatement parses individual ER diagram statements
- func (p *ERParser) parseStatement() error {
- if p.isAtEnd() {
- return nil
- }
- switch {
- case p.check(lexer.TokenNewline):
- p.advance() // Skip newlines
- return nil
- case p.checkKeyword("direction"):
- return p.parseDirectionStatement()
- case p.check(lexer.TokenID):
- // Try to parse as entity or relationship
- return p.parseEntityOrRelationship()
- default:
- token := p.peek()
- return p.error(fmt.Sprintf("unexpected token: %s", token.Value))
- }
- }
- // parseEntityOrRelationship attempts to parse either an entity definition or a relationship
- func (p *ERParser) parseEntityOrRelationship() error {
- entityName := p.advance().Value
- // Check if this is a relationship (has cardinality symbols)
- if p.checkCardinality() {
- return p.parseRelationship(entityName)
- }
- // Check if this is an entity with attributes (has {)
- if p.check(lexer.TokenOpenBrace) {
- return p.parseEntityWithAttributes(entityName)
- }
- // Simple entity without attributes
- p.addEntity(entityName)
- return nil
- }
- // parseRelationship parses a relationship between two entities
- func (p *ERParser) parseRelationship(fromEntity string) error {
- // Parse relationship type (already tokenized as compound token)
- relType, err := p.parseRelType()
- if err != nil {
- return err
- }
- // Parse second entity
- if !p.check(lexer.TokenID) {
- return p.error("expected second entity name")
- }
- toEntity := p.advance().Value
- // Ensure both entities exist
- p.addEntity(fromEntity)
- p.addEntity(toEntity)
- // Parse optional label
- var label *string
- if p.check(lexer.TokenColon) {
- p.advance() // consume ':'
- var labelParts []string
- for !p.check(lexer.TokenNewline) && !p.isAtEnd() {
- labelParts = append(labelParts, p.advance().Value)
- }
- labelStr := strings.TrimSpace(strings.Join(labelParts, " "))
- label = &labelStr
- }
- // Create relationship
- relation := &ast.ERRelation{
- From: fromEntity,
- To: toEntity,
- Type: relType,
- Label: label,
- }
- p.diagram.Relations = append(p.diagram.Relations, relation)
- return nil
- }
- // parseEntityWithAttributes parses an entity with attribute definitions
- func (p *ERParser) parseEntityWithAttributes(entityName string) error {
- p.advance() // consume '{'
- entity := p.addEntity(entityName)
- // Parse attributes
- for !p.check(lexer.TokenCloseBrace) && !p.isAtEnd() {
- if p.check(lexer.TokenNewline) {
- p.advance()
- continue
- }
- attribute, err := p.parseAttribute()
- if err != nil {
- return err
- }
- entity.Attributes = append(entity.Attributes, attribute)
- }
- if !p.check(lexer.TokenCloseBrace) {
- return p.error("expected '}' to close entity attributes")
- }
- p.advance() // consume '}'
- return nil
- }
- // parseAttribute parses an attribute definition
- func (p *ERParser) parseAttribute() (*ast.ERAttribute, error) {
- // Parse attribute type
- if !p.check(lexer.TokenID) {
- return nil, p.error("expected attribute type")
- }
- attrType := p.advance().Value
- // Parse attribute name
- if !p.check(lexer.TokenID) {
- return nil, p.error("expected attribute name")
- }
- attrName := p.advance().Value
- attribute := &ast.ERAttribute{
- Type: attrType,
- Name: attrName,
- }
- // Parse optional key (PK, FK, UK)
- if p.check(lexer.TokenID) && p.isKeyWord() {
- keyStr := p.advance().Value
- key := ast.ERKeyType(keyStr)
- attribute.Key = &key
- }
- // Parse optional comment (quoted string)
- if p.check(lexer.TokenString) {
- comment := p.advance().Value
- // Remove quotes
- if strings.HasPrefix(comment, "\"") && strings.HasSuffix(comment, "\"") {
- comment = comment[1 : len(comment)-1]
- }
- attribute.Comment = &comment
- }
- return attribute, nil
- }
- // parseRelType parses relationship type symbols
- func (p *ERParser) parseRelType() (ast.ERRelationType, error) {
- if p.isAtEnd() {
- return "", p.error("expected relationship type")
- }
- token := p.peek()
- // Check for compound ER relationship tokens first
- switch token.Type {
- case lexer.TokenEROneToMany:
- p.advance()
- return ast.ERRelationOneToMany, nil
- case lexer.TokenEROneToManyAlt:
- p.advance()
- return ast.ERRelationOneToManyAlt, nil
- case lexer.TokenERManyToOne:
- p.advance()
- return ast.ERRelationManyToOne, nil
- case lexer.TokenEROneToOne:
- p.advance()
- return ast.ERRelationOneToOne, nil
- case lexer.TokenERManyToMany:
- p.advance()
- return ast.ERRelationManyToMany, nil
- case lexer.TokenERManyToManyAlt:
- p.advance()
- return ast.ERRelationManyToManyAlt, nil
- case lexer.TokenERZeroToOne:
- p.advance()
- return ast.ERRelationZeroToOne, nil
- }
- // Fall back to individual token parsing for patterns not covered by compound tokens
- // Look ahead to match relationship patterns
- if p.matchString("||--||") {
- p.advance() // consume '|'
- p.advance() // consume '|'
- p.advance() // consume '-'
- p.advance() // consume '-'
- p.advance() // consume '|'
- p.advance() // consume '|'
- return ast.ERRelationOneToOne, nil
- }
- if p.matchString("||--o{") {
- p.advance() // consume '|'
- p.advance() // consume '|'
- p.advance() // consume '-'
- p.advance() // consume '-'
- p.advance() // consume 'o'
- p.advance() // consume '{'
- return ast.ERRelationOneToMany, nil
- }
- if p.matchString("}o--||") {
- p.advance() // consume '}'
- p.advance() // consume 'o'
- p.advance() // consume '-'
- p.advance() // consume '-'
- p.advance() // consume '|'
- p.advance() // consume '|'
- return ast.ERRelationManyToOne, nil
- }
- if p.matchString("}o--o{") {
- p.advance() // consume '}'
- p.advance() // consume 'o'
- p.advance() // consume '-'
- p.advance() // consume '-'
- p.advance() // consume 'o'
- p.advance() // consume '{'
- return ast.ERRelationManyToMany, nil
- }
- if p.matchString("||--o|") {
- p.advance() // consume '|'
- p.advance() // consume '|'
- p.advance() // consume '-'
- p.advance() // consume '-'
- p.advance() // consume 'o'
- p.advance() // consume '|'
- return ast.ERRelationZeroToOne, nil
- }
- if p.matchString("}o..o{") {
- p.advance() // consume '}'
- p.advance() // consume 'o'
- p.advance() // consume '.'
- p.advance() // consume '.'
- p.advance() // consume 'o'
- p.advance() // consume '{'
- return ast.ERRelationManyToMany, nil
- }
- if p.matchString("||..||") {
- p.advance() // consume '|'
- p.advance() // consume '|'
- p.advance() // consume '.'
- p.advance() // consume '.'
- p.advance() // consume '|'
- p.advance() // consume '|'
- return ast.ERRelationOneToOne, nil
- }
- return "", p.error("unrecognized relationship pattern")
- }
- // Helper methods
- func (p *ERParser) addEntity(name string) *ast.EREntity {
- if entity, exists := p.entityMap[name]; exists {
- return entity
- }
- entity := &ast.EREntity{
- ID: fmt.Sprintf("entity-%s-%d", name, len(p.diagram.Entities)),
- Name: name,
- Attributes: make([]*ast.ERAttribute, 0),
- CssClasses: []string{"default"},
- }
- p.entityMap[name] = entity
- p.diagram.Entities = append(p.diagram.Entities, entity)
- return entity
- }
- func (p *ERParser) checkCardinality() bool {
- if p.isAtEnd() {
- return false
- }
- token := p.peek()
- // Check for compound ER relationship tokens
- switch token.Type {
- case lexer.TokenEROneToMany, lexer.TokenEROneToManyAlt, lexer.TokenERManyToOne,
- lexer.TokenEROneToOne, lexer.TokenERManyToMany, lexer.TokenERManyToManyAlt, lexer.TokenERZeroToOne:
- return true
- }
- // Fall back to string matching for patterns not covered by compound tokens
- return p.matchString("||--||") || p.matchString("||--o{") || p.matchString("}o--||") ||
- p.matchString("}o--o{") || p.matchString("||--o|") || p.matchString("}o..o{") ||
- p.matchString("||..||")
- }
- func (p *ERParser) isKeyWord() bool {
- if p.isAtEnd() {
- return false
- }
- token := p.peek()
- return token.Type == lexer.TokenID && (token.Value == "PK" || token.Value == "FK" || token.Value == "UK")
- }
- func (p *ERParser) matchString(s string) bool {
- if p.current+len(s)-1 >= len(p.tokens) {
- return false
- }
- var actual strings.Builder
- for i := 0; i < len(s); i++ {
- if p.current+i >= len(p.tokens) {
- return false
- }
- actual.WriteString(p.tokens[p.current+i].Value)
- }
- return actual.String() == s
- }
- func (p *ERParser) check(tokenType lexer.TokenType) bool {
- if p.isAtEnd() {
- return false
- }
- return p.peek().Type == tokenType
- }
- func (p *ERParser) checkKeyword(keyword string) bool {
- if p.isAtEnd() {
- return false
- }
- token := p.peek()
- return token.Type == lexer.TokenID && strings.EqualFold(token.Value, keyword)
- }
- func (p *ERParser) advance() lexer.Token {
- if !p.isAtEnd() {
- p.current++
- }
- return p.previous()
- }
- func (p *ERParser) isAtEnd() bool {
- return p.current >= len(p.tokens) || p.peek().Type == lexer.TokenEOF
- }
- func (p *ERParser) peek() lexer.Token {
- if p.current >= len(p.tokens) {
- return lexer.Token{Type: lexer.TokenEOF}
- }
- return p.tokens[p.current]
- }
- func (p *ERParser) previous() lexer.Token {
- if p.current <= 0 {
- return lexer.Token{Type: lexer.TokenEOF}
- }
- return p.tokens[p.current-1]
- }
- func (p *ERParser) parseDirectionStatement() error {
- p.advance() // consume 'direction'
- if !p.check(lexer.TokenID) {
- return p.error("expected direction (TB, BT, RL, LR)")
- }
- // For now, we'll just consume the direction token
- // The existing ERDiagram struct doesn't have a Direction field
- p.advance()
- return nil
- }
- func (p *ERParser) error(message string) error {
- token := p.peek()
- return fmt.Errorf("parse error at line %d, column %d: %s (got %s)",
- token.Line, token.Column, message, token.Type.String())
- }
|