er.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. // Package parser provides ER diagram parsing based on erDiagram.jison
  2. package parser
  3. import (
  4. "fmt"
  5. "strings"
  6. "mermaid-go/pkg/ast"
  7. "mermaid-go/pkg/lexer"
  8. )
  9. // ERParser implements ER diagram parsing following erDiagram.jison
  10. type ERParser struct {
  11. tokens []lexer.Token
  12. current int
  13. diagram *ast.ERDiagram
  14. entityMap map[string]*ast.EREntity // Keep track of entities by name for quick lookup
  15. }
  16. // NewERParser creates a new ER parser
  17. func NewERParser() *ERParser {
  18. return &ERParser{
  19. diagram: ast.NewERDiagram(),
  20. entityMap: make(map[string]*ast.EREntity),
  21. }
  22. }
  23. // Parse parses ER diagram syntax
  24. func (p *ERParser) Parse(input string) (*ast.ERDiagram, error) {
  25. // Tokenize
  26. l := lexer.NewLexer(input)
  27. tokens, err := l.Tokenize()
  28. if err != nil {
  29. return nil, fmt.Errorf("lexical analysis failed: %w", err)
  30. }
  31. // Filter tokens
  32. p.tokens = lexer.FilterTokens(tokens)
  33. p.current = 0
  34. p.diagram = ast.NewERDiagram()
  35. // Parse document
  36. err = p.parseDocument()
  37. if err != nil {
  38. return nil, fmt.Errorf("syntax analysis failed: %w", err)
  39. }
  40. return p.diagram, nil
  41. }
  42. // parseDocument parses the ER diagram document
  43. func (p *ERParser) parseDocument() error {
  44. // Expect erDiagram
  45. if !p.check(lexer.TokenID) || p.peek().Value != "erDiagram" {
  46. return p.error("expected 'erDiagram'")
  47. }
  48. p.advance()
  49. // Parse statements
  50. for !p.isAtEnd() {
  51. if err := p.parseStatement(); err != nil {
  52. return err
  53. }
  54. }
  55. return nil
  56. }
  57. // parseStatement parses individual ER diagram statements
  58. func (p *ERParser) parseStatement() error {
  59. if p.isAtEnd() {
  60. return nil
  61. }
  62. switch {
  63. case p.check(lexer.TokenNewline):
  64. p.advance() // Skip newlines
  65. return nil
  66. case p.checkKeyword("direction"):
  67. return p.parseDirectionStatement()
  68. case p.check(lexer.TokenID):
  69. // Try to parse as entity or relationship
  70. return p.parseEntityOrRelationship()
  71. default:
  72. token := p.peek()
  73. return p.error(fmt.Sprintf("unexpected token: %s", token.Value))
  74. }
  75. }
  76. // parseEntityOrRelationship attempts to parse either an entity definition or a relationship
  77. func (p *ERParser) parseEntityOrRelationship() error {
  78. entityName := p.advance().Value
  79. // Check if this is a relationship (has cardinality symbols)
  80. if p.checkCardinality() {
  81. return p.parseRelationship(entityName)
  82. }
  83. // Check if this is an entity with attributes (has {)
  84. if p.check(lexer.TokenOpenBrace) {
  85. return p.parseEntityWithAttributes(entityName)
  86. }
  87. // Simple entity without attributes
  88. p.addEntity(entityName)
  89. return nil
  90. }
  91. // parseRelationship parses a relationship between two entities
  92. func (p *ERParser) parseRelationship(fromEntity string) error {
  93. // Parse relationship type (already tokenized as compound token)
  94. relType, err := p.parseRelType()
  95. if err != nil {
  96. return err
  97. }
  98. // Parse second entity
  99. if !p.check(lexer.TokenID) {
  100. return p.error("expected second entity name")
  101. }
  102. toEntity := p.advance().Value
  103. // Ensure both entities exist
  104. p.addEntity(fromEntity)
  105. p.addEntity(toEntity)
  106. // Parse optional label
  107. var label *string
  108. if p.check(lexer.TokenColon) {
  109. p.advance() // consume ':'
  110. var labelParts []string
  111. for !p.check(lexer.TokenNewline) && !p.isAtEnd() {
  112. labelParts = append(labelParts, p.advance().Value)
  113. }
  114. labelStr := strings.TrimSpace(strings.Join(labelParts, " "))
  115. label = &labelStr
  116. }
  117. // Create relationship
  118. relation := &ast.ERRelation{
  119. From: fromEntity,
  120. To: toEntity,
  121. Type: relType,
  122. Label: label,
  123. }
  124. p.diagram.Relations = append(p.diagram.Relations, relation)
  125. return nil
  126. }
  127. // parseEntityWithAttributes parses an entity with attribute definitions
  128. func (p *ERParser) parseEntityWithAttributes(entityName string) error {
  129. p.advance() // consume '{'
  130. entity := p.addEntity(entityName)
  131. // Parse attributes
  132. for !p.check(lexer.TokenCloseBrace) && !p.isAtEnd() {
  133. if p.check(lexer.TokenNewline) {
  134. p.advance()
  135. continue
  136. }
  137. attribute, err := p.parseAttribute()
  138. if err != nil {
  139. return err
  140. }
  141. entity.Attributes = append(entity.Attributes, attribute)
  142. }
  143. if !p.check(lexer.TokenCloseBrace) {
  144. return p.error("expected '}' to close entity attributes")
  145. }
  146. p.advance() // consume '}'
  147. return nil
  148. }
  149. // parseAttribute parses an attribute definition
  150. func (p *ERParser) parseAttribute() (*ast.ERAttribute, error) {
  151. // Parse attribute type
  152. if !p.check(lexer.TokenID) {
  153. return nil, p.error("expected attribute type")
  154. }
  155. attrType := p.advance().Value
  156. // Parse attribute name
  157. if !p.check(lexer.TokenID) {
  158. return nil, p.error("expected attribute name")
  159. }
  160. attrName := p.advance().Value
  161. attribute := &ast.ERAttribute{
  162. Type: attrType,
  163. Name: attrName,
  164. }
  165. // Parse optional key (PK, FK, UK)
  166. if p.check(lexer.TokenID) && p.isKeyWord() {
  167. keyStr := p.advance().Value
  168. key := ast.ERKeyType(keyStr)
  169. attribute.Key = &key
  170. }
  171. // Parse optional comment (quoted string)
  172. if p.check(lexer.TokenString) {
  173. comment := p.advance().Value
  174. // Remove quotes
  175. if strings.HasPrefix(comment, "\"") && strings.HasSuffix(comment, "\"") {
  176. comment = comment[1 : len(comment)-1]
  177. }
  178. attribute.Comment = &comment
  179. }
  180. return attribute, nil
  181. }
  182. // parseRelType parses relationship type symbols
  183. func (p *ERParser) parseRelType() (ast.ERRelationType, error) {
  184. if p.isAtEnd() {
  185. return "", p.error("expected relationship type")
  186. }
  187. token := p.peek()
  188. // Check for compound ER relationship tokens first
  189. switch token.Type {
  190. case lexer.TokenEROneToMany:
  191. p.advance()
  192. return ast.ERRelationOneToMany, nil
  193. case lexer.TokenEROneToManyAlt:
  194. p.advance()
  195. return ast.ERRelationOneToManyAlt, nil
  196. case lexer.TokenERManyToOne:
  197. p.advance()
  198. return ast.ERRelationManyToOne, nil
  199. case lexer.TokenEROneToOne:
  200. p.advance()
  201. return ast.ERRelationOneToOne, nil
  202. case lexer.TokenERManyToMany:
  203. p.advance()
  204. return ast.ERRelationManyToMany, nil
  205. case lexer.TokenERManyToManyAlt:
  206. p.advance()
  207. return ast.ERRelationManyToManyAlt, nil
  208. case lexer.TokenERZeroToOne:
  209. p.advance()
  210. return ast.ERRelationZeroToOne, nil
  211. }
  212. // Fall back to individual token parsing for patterns not covered by compound tokens
  213. // Look ahead to match relationship patterns
  214. if p.matchString("||--||") {
  215. p.advance() // consume '|'
  216. p.advance() // consume '|'
  217. p.advance() // consume '-'
  218. p.advance() // consume '-'
  219. p.advance() // consume '|'
  220. p.advance() // consume '|'
  221. return ast.ERRelationOneToOne, nil
  222. }
  223. if p.matchString("||--o{") {
  224. p.advance() // consume '|'
  225. p.advance() // consume '|'
  226. p.advance() // consume '-'
  227. p.advance() // consume '-'
  228. p.advance() // consume 'o'
  229. p.advance() // consume '{'
  230. return ast.ERRelationOneToMany, nil
  231. }
  232. if p.matchString("}o--||") {
  233. p.advance() // consume '}'
  234. p.advance() // consume 'o'
  235. p.advance() // consume '-'
  236. p.advance() // consume '-'
  237. p.advance() // consume '|'
  238. p.advance() // consume '|'
  239. return ast.ERRelationManyToOne, nil
  240. }
  241. if p.matchString("}o--o{") {
  242. p.advance() // consume '}'
  243. p.advance() // consume 'o'
  244. p.advance() // consume '-'
  245. p.advance() // consume '-'
  246. p.advance() // consume 'o'
  247. p.advance() // consume '{'
  248. return ast.ERRelationManyToMany, nil
  249. }
  250. if p.matchString("||--o|") {
  251. p.advance() // consume '|'
  252. p.advance() // consume '|'
  253. p.advance() // consume '-'
  254. p.advance() // consume '-'
  255. p.advance() // consume 'o'
  256. p.advance() // consume '|'
  257. return ast.ERRelationZeroToOne, nil
  258. }
  259. if p.matchString("}o..o{") {
  260. p.advance() // consume '}'
  261. p.advance() // consume 'o'
  262. p.advance() // consume '.'
  263. p.advance() // consume '.'
  264. p.advance() // consume 'o'
  265. p.advance() // consume '{'
  266. return ast.ERRelationManyToMany, nil
  267. }
  268. if p.matchString("||..||") {
  269. p.advance() // consume '|'
  270. p.advance() // consume '|'
  271. p.advance() // consume '.'
  272. p.advance() // consume '.'
  273. p.advance() // consume '|'
  274. p.advance() // consume '|'
  275. return ast.ERRelationOneToOne, nil
  276. }
  277. return "", p.error("unrecognized relationship pattern")
  278. }
  279. // Helper methods
  280. func (p *ERParser) addEntity(name string) *ast.EREntity {
  281. if entity, exists := p.entityMap[name]; exists {
  282. return entity
  283. }
  284. entity := &ast.EREntity{
  285. ID: fmt.Sprintf("entity-%s-%d", name, len(p.diagram.Entities)),
  286. Name: name,
  287. Attributes: make([]*ast.ERAttribute, 0),
  288. CssClasses: []string{"default"},
  289. }
  290. p.entityMap[name] = entity
  291. p.diagram.Entities = append(p.diagram.Entities, entity)
  292. return entity
  293. }
  294. func (p *ERParser) checkCardinality() bool {
  295. if p.isAtEnd() {
  296. return false
  297. }
  298. token := p.peek()
  299. // Check for compound ER relationship tokens
  300. switch token.Type {
  301. case lexer.TokenEROneToMany, lexer.TokenEROneToManyAlt, lexer.TokenERManyToOne,
  302. lexer.TokenEROneToOne, lexer.TokenERManyToMany, lexer.TokenERManyToManyAlt, lexer.TokenERZeroToOne:
  303. return true
  304. }
  305. // Fall back to string matching for patterns not covered by compound tokens
  306. return p.matchString("||--||") || p.matchString("||--o{") || p.matchString("}o--||") ||
  307. p.matchString("}o--o{") || p.matchString("||--o|") || p.matchString("}o..o{") ||
  308. p.matchString("||..||")
  309. }
  310. func (p *ERParser) isKeyWord() bool {
  311. if p.isAtEnd() {
  312. return false
  313. }
  314. token := p.peek()
  315. return token.Type == lexer.TokenID && (token.Value == "PK" || token.Value == "FK" || token.Value == "UK")
  316. }
  317. func (p *ERParser) matchString(s string) bool {
  318. if p.current+len(s)-1 >= len(p.tokens) {
  319. return false
  320. }
  321. var actual strings.Builder
  322. for i := 0; i < len(s); i++ {
  323. if p.current+i >= len(p.tokens) {
  324. return false
  325. }
  326. actual.WriteString(p.tokens[p.current+i].Value)
  327. }
  328. return actual.String() == s
  329. }
  330. func (p *ERParser) check(tokenType lexer.TokenType) bool {
  331. if p.isAtEnd() {
  332. return false
  333. }
  334. return p.peek().Type == tokenType
  335. }
  336. func (p *ERParser) checkKeyword(keyword string) bool {
  337. if p.isAtEnd() {
  338. return false
  339. }
  340. token := p.peek()
  341. return token.Type == lexer.TokenID && strings.EqualFold(token.Value, keyword)
  342. }
  343. func (p *ERParser) advance() lexer.Token {
  344. if !p.isAtEnd() {
  345. p.current++
  346. }
  347. return p.previous()
  348. }
  349. func (p *ERParser) isAtEnd() bool {
  350. return p.current >= len(p.tokens) || p.peek().Type == lexer.TokenEOF
  351. }
  352. func (p *ERParser) peek() lexer.Token {
  353. if p.current >= len(p.tokens) {
  354. return lexer.Token{Type: lexer.TokenEOF}
  355. }
  356. return p.tokens[p.current]
  357. }
  358. func (p *ERParser) previous() lexer.Token {
  359. if p.current <= 0 {
  360. return lexer.Token{Type: lexer.TokenEOF}
  361. }
  362. return p.tokens[p.current-1]
  363. }
  364. func (p *ERParser) parseDirectionStatement() error {
  365. p.advance() // consume 'direction'
  366. if !p.check(lexer.TokenID) {
  367. return p.error("expected direction (TB, BT, RL, LR)")
  368. }
  369. // For now, we'll just consume the direction token
  370. // The existing ERDiagram struct doesn't have a Direction field
  371. p.advance()
  372. return nil
  373. }
  374. func (p *ERParser) error(message string) error {
  375. token := p.peek()
  376. return fmt.Errorf("parse error at line %d, column %d: %s (got %s)",
  377. token.Line, token.Column, message, token.Type.String())
  378. }