sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LIST, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 ARRAY_CONSTRUCTORS = { 461 "ARRAY": exp.Array, 462 "LIST": exp.List, 463 } 464 465 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 466 467 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 468 469 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 470 471 FUNC_TOKENS = { 472 TokenType.COLLATE, 473 TokenType.COMMAND, 474 TokenType.CURRENT_DATE, 475 TokenType.CURRENT_DATETIME, 476 TokenType.CURRENT_TIMESTAMP, 477 TokenType.CURRENT_TIME, 478 TokenType.CURRENT_USER, 479 TokenType.FILTER, 480 TokenType.FIRST, 481 TokenType.FORMAT, 482 TokenType.GLOB, 483 TokenType.IDENTIFIER, 484 TokenType.INDEX, 485 TokenType.ISNULL, 486 TokenType.ILIKE, 487 TokenType.INSERT, 488 TokenType.LIKE, 489 TokenType.MERGE, 490 TokenType.OFFSET, 491 TokenType.PRIMARY_KEY, 492 TokenType.RANGE, 493 TokenType.REPLACE, 494 TokenType.RLIKE, 495 TokenType.ROW, 496 TokenType.UNNEST, 497 TokenType.VAR, 498 TokenType.LEFT, 499 TokenType.RIGHT, 500 TokenType.SEQUENCE, 501 TokenType.DATE, 502 TokenType.DATETIME, 503 TokenType.TABLE, 504 TokenType.TIMESTAMP, 505 TokenType.TIMESTAMPTZ, 506 TokenType.TRUNCATE, 507 TokenType.WINDOW, 508 TokenType.XOR, 509 *TYPE_TOKENS, 510 *SUBQUERY_PREDICATES, 511 } 512 513 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 514 TokenType.AND: exp.And, 515 } 516 517 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 518 TokenType.COLON_EQ: exp.PropertyEQ, 519 } 520 521 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 522 TokenType.OR: exp.Or, 523 } 524 525 EQUALITY = { 526 TokenType.EQ: exp.EQ, 527 TokenType.NEQ: exp.NEQ, 528 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 529 } 530 531 COMPARISON = { 532 TokenType.GT: exp.GT, 533 TokenType.GTE: exp.GTE, 534 TokenType.LT: exp.LT, 535 TokenType.LTE: exp.LTE, 536 } 537 538 BITWISE = { 539 TokenType.AMP: exp.BitwiseAnd, 540 TokenType.CARET: exp.BitwiseXor, 541 TokenType.PIPE: exp.BitwiseOr, 542 } 543 544 TERM = { 545 TokenType.DASH: exp.Sub, 546 TokenType.PLUS: exp.Add, 547 TokenType.MOD: exp.Mod, 548 TokenType.COLLATE: exp.Collate, 549 } 550 551 FACTOR = { 552 TokenType.DIV: exp.IntDiv, 553 TokenType.LR_ARROW: exp.Distance, 554 TokenType.SLASH: exp.Div, 555 TokenType.STAR: exp.Mul, 556 } 557 558 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 559 560 TIMES = { 561 TokenType.TIME, 562 TokenType.TIMETZ, 563 } 564 565 TIMESTAMPS = { 566 TokenType.TIMESTAMP, 567 TokenType.TIMESTAMPTZ, 568 TokenType.TIMESTAMPLTZ, 569 *TIMES, 570 } 571 572 SET_OPERATIONS = { 573 TokenType.UNION, 574 TokenType.INTERSECT, 575 TokenType.EXCEPT, 576 } 577 578 JOIN_METHODS = { 579 TokenType.ASOF, 580 TokenType.NATURAL, 581 TokenType.POSITIONAL, 582 } 583 584 JOIN_SIDES = { 585 TokenType.LEFT, 586 TokenType.RIGHT, 587 TokenType.FULL, 588 } 589 590 JOIN_KINDS = { 591 TokenType.ANTI, 592 TokenType.CROSS, 593 TokenType.INNER, 594 TokenType.OUTER, 595 TokenType.SEMI, 596 TokenType.STRAIGHT_JOIN, 597 } 598 599 JOIN_HINTS: t.Set[str] = set() 600 601 LAMBDAS = { 602 TokenType.ARROW: lambda self, expressions: self.expression( 603 exp.Lambda, 604 this=self._replace_lambda( 605 self._parse_assignment(), 606 expressions, 607 ), 608 expressions=expressions, 609 ), 610 TokenType.FARROW: lambda self, expressions: self.expression( 611 exp.Kwarg, 612 this=exp.var(expressions[0].name), 613 expression=self._parse_assignment(), 614 ), 615 } 616 617 COLUMN_OPERATORS = { 618 TokenType.DOT: None, 619 TokenType.DCOLON: lambda self, this, to: self.expression( 620 exp.Cast if self.STRICT_CAST else exp.TryCast, 621 this=this, 622 to=to, 623 ), 624 TokenType.ARROW: lambda self, this, path: self.expression( 625 exp.JSONExtract, 626 this=this, 627 expression=self.dialect.to_json_path(path), 628 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 629 ), 630 TokenType.DARROW: lambda self, this, path: self.expression( 631 exp.JSONExtractScalar, 632 this=this, 633 expression=self.dialect.to_json_path(path), 634 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 635 ), 636 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 637 exp.JSONBExtract, 638 this=this, 639 expression=path, 640 ), 641 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 642 exp.JSONBExtractScalar, 643 this=this, 644 expression=path, 645 ), 646 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 647 exp.JSONBContains, 648 this=this, 649 expression=key, 650 ), 651 } 652 653 EXPRESSION_PARSERS = { 654 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 655 exp.Column: lambda self: self._parse_column(), 656 exp.Condition: lambda self: self._parse_assignment(), 657 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 658 exp.Expression: lambda self: self._parse_expression(), 659 exp.From: lambda self: self._parse_from(joins=True), 660 exp.Group: lambda self: self._parse_group(), 661 exp.Having: lambda self: self._parse_having(), 662 exp.Identifier: lambda self: self._parse_id_var(), 663 exp.Join: lambda self: self._parse_join(), 664 exp.Lambda: lambda self: self._parse_lambda(), 665 exp.Lateral: lambda self: self._parse_lateral(), 666 exp.Limit: lambda self: self._parse_limit(), 667 exp.Offset: lambda self: self._parse_offset(), 668 exp.Order: lambda self: self._parse_order(), 669 exp.Ordered: lambda self: self._parse_ordered(), 670 exp.Properties: lambda self: self._parse_properties(), 671 exp.Qualify: lambda self: self._parse_qualify(), 672 exp.Returning: lambda self: self._parse_returning(), 673 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 674 exp.Table: lambda self: self._parse_table_parts(), 675 exp.TableAlias: lambda self: self._parse_table_alias(), 676 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 677 exp.Where: lambda self: self._parse_where(), 678 exp.Window: lambda self: self._parse_named_window(), 679 exp.With: lambda self: self._parse_with(), 680 "JOIN_TYPE": lambda self: self._parse_join_parts(), 681 } 682 683 STATEMENT_PARSERS = { 684 TokenType.ALTER: lambda self: self._parse_alter(), 685 TokenType.BEGIN: lambda self: self._parse_transaction(), 686 TokenType.CACHE: lambda self: self._parse_cache(), 687 TokenType.COMMENT: lambda self: self._parse_comment(), 688 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 689 TokenType.COPY: lambda self: self._parse_copy(), 690 TokenType.CREATE: lambda self: self._parse_create(), 691 TokenType.DELETE: lambda self: self._parse_delete(), 692 TokenType.DESC: lambda self: self._parse_describe(), 693 TokenType.DESCRIBE: lambda self: self._parse_describe(), 694 TokenType.DROP: lambda self: self._parse_drop(), 695 TokenType.INSERT: lambda self: self._parse_insert(), 696 TokenType.KILL: lambda self: self._parse_kill(), 697 TokenType.LOAD: lambda self: self._parse_load(), 698 TokenType.MERGE: lambda self: self._parse_merge(), 699 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 700 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 701 TokenType.REFRESH: lambda self: self._parse_refresh(), 702 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 703 TokenType.SET: lambda self: self._parse_set(), 704 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 705 TokenType.UNCACHE: lambda self: self._parse_uncache(), 706 TokenType.UPDATE: lambda self: self._parse_update(), 707 TokenType.USE: lambda self: self.expression( 708 exp.Use, 709 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 710 this=self._parse_table(schema=False), 711 ), 712 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 713 } 714 715 UNARY_PARSERS = { 716 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 717 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 718 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 719 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 720 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 721 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 722 } 723 724 STRING_PARSERS = { 725 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 726 exp.RawString, this=token.text 727 ), 728 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 729 exp.National, this=token.text 730 ), 731 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 732 TokenType.STRING: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=True 734 ), 735 TokenType.UNICODE_STRING: lambda self, token: self.expression( 736 exp.UnicodeString, 737 this=token.text, 738 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 739 ), 740 } 741 742 NUMERIC_PARSERS = { 743 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 744 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 745 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 746 TokenType.NUMBER: lambda self, token: self.expression( 747 exp.Literal, this=token.text, is_string=False 748 ), 749 } 750 751 PRIMARY_PARSERS = { 752 **STRING_PARSERS, 753 **NUMERIC_PARSERS, 754 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 755 TokenType.NULL: lambda self, _: self.expression(exp.Null), 756 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 757 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 758 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 759 TokenType.STAR: lambda self, _: self.expression( 760 exp.Star, 761 **{ 762 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 763 "replace": self._parse_star_op("REPLACE"), 764 "rename": self._parse_star_op("RENAME"), 765 }, 766 ), 767 } 768 769 PLACEHOLDER_PARSERS = { 770 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 771 TokenType.PARAMETER: lambda self: self._parse_parameter(), 772 TokenType.COLON: lambda self: ( 773 self.expression(exp.Placeholder, this=self._prev.text) 774 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 775 else None 776 ), 777 } 778 779 RANGE_PARSERS = { 780 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 781 TokenType.GLOB: binary_range_parser(exp.Glob), 782 TokenType.ILIKE: binary_range_parser(exp.ILike), 783 TokenType.IN: lambda self, this: self._parse_in(this), 784 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 785 TokenType.IS: lambda self, this: self._parse_is(this), 786 TokenType.LIKE: binary_range_parser(exp.Like), 787 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 788 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 789 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 790 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 791 } 792 793 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 794 "ALLOWED_VALUES": lambda self: self.expression( 795 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 796 ), 797 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 798 "AUTO": lambda self: self._parse_auto_property(), 799 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 800 "BACKUP": lambda self: self.expression( 801 exp.BackupProperty, this=self._parse_var(any_token=True) 802 ), 803 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 804 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 805 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 806 "CHECKSUM": lambda self: self._parse_checksum(), 807 "CLUSTER BY": lambda self: self._parse_cluster(), 808 "CLUSTERED": lambda self: self._parse_clustered_by(), 809 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 810 exp.CollateProperty, **kwargs 811 ), 812 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 813 "CONTAINS": lambda self: self._parse_contains_property(), 814 "COPY": lambda self: self._parse_copy_property(), 815 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 816 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 817 "DEFINER": lambda self: self._parse_definer(), 818 "DETERMINISTIC": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "DISTKEY": lambda self: self._parse_distkey(), 822 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 823 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 824 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 825 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 826 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 827 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 828 "FREESPACE": lambda self: self._parse_freespace(), 829 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 830 "HEAP": lambda self: self.expression(exp.HeapProperty), 831 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 832 "IMMUTABLE": lambda self: self.expression( 833 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 834 ), 835 "INHERITS": lambda self: self.expression( 836 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 837 ), 838 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 839 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 840 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 841 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 842 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 843 "LIKE": lambda self: self._parse_create_like(), 844 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 845 "LOCK": lambda self: self._parse_locking(), 846 "LOCKING": lambda self: self._parse_locking(), 847 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 848 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 849 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 850 "MODIFIES": lambda self: self._parse_modifies_property(), 851 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 852 "NO": lambda self: self._parse_no_property(), 853 "ON": lambda self: self._parse_on_property(), 854 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 855 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 856 "PARTITION": lambda self: self._parse_partitioned_of(), 857 "PARTITION BY": lambda self: self._parse_partitioned_by(), 858 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 859 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 860 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 861 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 862 "READS": lambda self: self._parse_reads_property(), 863 "REMOTE": lambda self: self._parse_remote_with_connection(), 864 "RETURNS": lambda self: self._parse_returns(), 865 "STRICT": lambda self: self.expression(exp.StrictProperty), 866 "ROW": lambda self: self._parse_row(), 867 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 868 "SAMPLE": lambda self: self.expression( 869 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 870 ), 871 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 872 "SETTINGS": lambda self: self.expression( 873 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 874 ), 875 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 876 "SORTKEY": lambda self: self._parse_sortkey(), 877 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 878 "STABLE": lambda self: self.expression( 879 exp.StabilityProperty, this=exp.Literal.string("STABLE") 880 ), 881 "STORED": lambda self: self._parse_stored(), 882 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 883 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 884 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 885 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 886 "TO": lambda self: self._parse_to_table(), 887 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 888 "TRANSFORM": lambda self: self.expression( 889 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 890 ), 891 "TTL": lambda self: self._parse_ttl(), 892 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 893 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 894 "VOLATILE": lambda self: self._parse_volatile_property(), 895 "WITH": lambda self: self._parse_with_property(), 896 } 897 898 CONSTRAINT_PARSERS = { 899 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 900 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 901 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 902 "CHARACTER SET": lambda self: self.expression( 903 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 904 ), 905 "CHECK": lambda self: self.expression( 906 exp.CheckColumnConstraint, 907 this=self._parse_wrapped(self._parse_assignment), 908 enforced=self._match_text_seq("ENFORCED"), 909 ), 910 "COLLATE": lambda self: self.expression( 911 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 912 ), 913 "COMMENT": lambda self: self.expression( 914 exp.CommentColumnConstraint, this=self._parse_string() 915 ), 916 "COMPRESS": lambda self: self._parse_compress(), 917 "CLUSTERED": lambda self: self.expression( 918 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 919 ), 920 "NONCLUSTERED": lambda self: self.expression( 921 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 922 ), 923 "DEFAULT": lambda self: self.expression( 924 exp.DefaultColumnConstraint, this=self._parse_bitwise() 925 ), 926 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 927 "EPHEMERAL": lambda self: self.expression( 928 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 929 ), 930 "EXCLUDE": lambda self: self.expression( 931 exp.ExcludeColumnConstraint, this=self._parse_index_params() 932 ), 933 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 934 "FORMAT": lambda self: self.expression( 935 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 936 ), 937 "GENERATED": lambda self: self._parse_generated_as_identity(), 938 "IDENTITY": lambda self: self._parse_auto_increment(), 939 "INLINE": lambda self: self._parse_inline(), 940 "LIKE": lambda self: self._parse_create_like(), 941 "NOT": lambda self: self._parse_not_constraint(), 942 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 943 "ON": lambda self: ( 944 self._match(TokenType.UPDATE) 945 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 946 ) 947 or self.expression(exp.OnProperty, this=self._parse_id_var()), 948 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 949 "PERIOD": lambda self: self._parse_period_for_system_time(), 950 "PRIMARY KEY": lambda self: self._parse_primary_key(), 951 "REFERENCES": lambda self: self._parse_references(match=False), 952 "TITLE": lambda self: self.expression( 953 exp.TitleColumnConstraint, this=self._parse_var_or_string() 954 ), 955 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 956 "UNIQUE": lambda self: self._parse_unique(), 957 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 958 "WITH": lambda self: self.expression( 959 exp.Properties, expressions=self._parse_wrapped_properties() 960 ), 961 } 962 963 ALTER_PARSERS = { 964 "ADD": lambda self: self._parse_alter_table_add(), 965 "ALTER": lambda self: self._parse_alter_table_alter(), 966 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 967 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 968 "DROP": lambda self: self._parse_alter_table_drop(), 969 "RENAME": lambda self: self._parse_alter_table_rename(), 970 "SET": lambda self: self._parse_alter_table_set(), 971 } 972 973 ALTER_ALTER_PARSERS = { 974 "DISTKEY": lambda self: self._parse_alter_diststyle(), 975 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 976 "SORTKEY": lambda self: self._parse_alter_sortkey(), 977 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 978 } 979 980 SCHEMA_UNNAMED_CONSTRAINTS = { 981 "CHECK", 982 "EXCLUDE", 983 "FOREIGN KEY", 984 "LIKE", 985 "PERIOD", 986 "PRIMARY KEY", 987 "UNIQUE", 988 } 989 990 NO_PAREN_FUNCTION_PARSERS = { 991 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 992 "CASE": lambda self: self._parse_case(), 993 "IF": lambda self: self._parse_if(), 994 "NEXT": lambda self: self._parse_next_value_for(), 995 } 996 997 INVALID_FUNC_NAME_TOKENS = { 998 TokenType.IDENTIFIER, 999 TokenType.STRING, 1000 } 1001 1002 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1003 1004 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1005 1006 FUNCTION_PARSERS = { 1007 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1008 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1009 "DECODE": lambda self: self._parse_decode(), 1010 "EXTRACT": lambda self: self._parse_extract(), 1011 "GAP_FILL": lambda self: self._parse_gap_fill(), 1012 "JSON_OBJECT": lambda self: self._parse_json_object(), 1013 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1014 "JSON_TABLE": lambda self: self._parse_json_table(), 1015 "MATCH": lambda self: self._parse_match_against(), 1016 "OPENJSON": lambda self: self._parse_open_json(), 1017 "POSITION": lambda self: self._parse_position(), 1018 "PREDICT": lambda self: self._parse_predict(), 1019 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1020 "STRING_AGG": lambda self: self._parse_string_agg(), 1021 "SUBSTRING": lambda self: self._parse_substring(), 1022 "TRIM": lambda self: self._parse_trim(), 1023 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1024 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1025 } 1026 1027 QUERY_MODIFIER_PARSERS = { 1028 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1029 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1030 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1031 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1032 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1033 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1034 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1035 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1036 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1037 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1038 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1039 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1040 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1041 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1042 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1043 TokenType.CLUSTER_BY: lambda self: ( 1044 "cluster", 1045 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1046 ), 1047 TokenType.DISTRIBUTE_BY: lambda self: ( 1048 "distribute", 1049 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1050 ), 1051 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1052 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1053 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1054 } 1055 1056 SET_PARSERS = { 1057 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1058 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1059 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1060 "TRANSACTION": lambda self: self._parse_set_transaction(), 1061 } 1062 1063 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1064 1065 TYPE_LITERAL_PARSERS = { 1066 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1067 } 1068 1069 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1070 1071 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1072 1073 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1074 1075 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1076 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1077 "ISOLATION": ( 1078 ("LEVEL", "REPEATABLE", "READ"), 1079 ("LEVEL", "READ", "COMMITTED"), 1080 ("LEVEL", "READ", "UNCOMITTED"), 1081 ("LEVEL", "SERIALIZABLE"), 1082 ), 1083 "READ": ("WRITE", "ONLY"), 1084 } 1085 1086 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1087 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1088 ) 1089 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1090 1091 CREATE_SEQUENCE: OPTIONS_TYPE = { 1092 "SCALE": ("EXTEND", "NOEXTEND"), 1093 "SHARD": ("EXTEND", "NOEXTEND"), 1094 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1095 **dict.fromkeys( 1096 ( 1097 "SESSION", 1098 "GLOBAL", 1099 "KEEP", 1100 "NOKEEP", 1101 "ORDER", 1102 "NOORDER", 1103 "NOCACHE", 1104 "CYCLE", 1105 "NOCYCLE", 1106 "NOMINVALUE", 1107 "NOMAXVALUE", 1108 "NOSCALE", 1109 "NOSHARD", 1110 ), 1111 tuple(), 1112 ), 1113 } 1114 1115 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1116 1117 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1118 1119 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1120 1121 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1122 1123 CLONE_KEYWORDS = {"CLONE", "COPY"} 1124 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1125 1126 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1127 1128 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1129 1130 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1131 1132 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1133 1134 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1135 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1136 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1137 1138 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1139 1140 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1141 1142 ADD_CONSTRAINT_TOKENS = { 1143 TokenType.CONSTRAINT, 1144 TokenType.FOREIGN_KEY, 1145 TokenType.INDEX, 1146 TokenType.KEY, 1147 TokenType.PRIMARY_KEY, 1148 TokenType.UNIQUE, 1149 } 1150 1151 DISTINCT_TOKENS = {TokenType.DISTINCT} 1152 1153 NULL_TOKENS = {TokenType.NULL} 1154 1155 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1156 1157 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1158 1159 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1160 1161 STRICT_CAST = True 1162 1163 PREFIXED_PIVOT_COLUMNS = False 1164 IDENTIFY_PIVOT_STRINGS = False 1165 1166 LOG_DEFAULTS_TO_LN = False 1167 1168 # Whether ADD is present for each column added by ALTER TABLE 1169 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1170 1171 # Whether the table sample clause expects CSV syntax 1172 TABLESAMPLE_CSV = False 1173 1174 # The default method used for table sampling 1175 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1176 1177 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1178 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1179 1180 # Whether the TRIM function expects the characters to trim as its first argument 1181 TRIM_PATTERN_FIRST = False 1182 1183 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1184 STRING_ALIASES = False 1185 1186 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1187 MODIFIERS_ATTACHED_TO_UNION = True 1188 UNION_MODIFIERS = {"order", "limit", "offset"} 1189 1190 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1191 NO_PAREN_IF_COMMANDS = True 1192 1193 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1194 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1195 1196 # Whether the `:` operator is used to extract a value from a JSON document 1197 COLON_IS_JSON_EXTRACT = False 1198 1199 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1200 # If this is True and '(' is not found, the keyword will be treated as an identifier 1201 VALUES_FOLLOWED_BY_PAREN = True 1202 1203 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1204 SUPPORTS_IMPLICIT_UNNEST = False 1205 1206 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1207 INTERVAL_SPANS = True 1208 1209 # Whether a PARTITION clause can follow a table reference 1210 SUPPORTS_PARTITION_SELECTION = False 1211 1212 __slots__ = ( 1213 "error_level", 1214 "error_message_context", 1215 "max_errors", 1216 "dialect", 1217 "sql", 1218 "errors", 1219 "_tokens", 1220 "_index", 1221 "_curr", 1222 "_next", 1223 "_prev", 1224 "_prev_comments", 1225 ) 1226 1227 # Autofilled 1228 SHOW_TRIE: t.Dict = {} 1229 SET_TRIE: t.Dict = {} 1230 1231 def __init__( 1232 self, 1233 error_level: t.Optional[ErrorLevel] = None, 1234 error_message_context: int = 100, 1235 max_errors: int = 3, 1236 dialect: DialectType = None, 1237 ): 1238 from sqlglot.dialects import Dialect 1239 1240 self.error_level = error_level or ErrorLevel.IMMEDIATE 1241 self.error_message_context = error_message_context 1242 self.max_errors = max_errors 1243 self.dialect = Dialect.get_or_raise(dialect) 1244 self.reset() 1245 1246 def reset(self): 1247 self.sql = "" 1248 self.errors = [] 1249 self._tokens = [] 1250 self._index = 0 1251 self._curr = None 1252 self._next = None 1253 self._prev = None 1254 self._prev_comments = None 1255 1256 def parse( 1257 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1258 ) -> t.List[t.Optional[exp.Expression]]: 1259 """ 1260 Parses a list of tokens and returns a list of syntax trees, one tree 1261 per parsed SQL statement. 1262 1263 Args: 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The list of the produced syntax trees. 1269 """ 1270 return self._parse( 1271 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1272 ) 1273 1274 def parse_into( 1275 self, 1276 expression_types: exp.IntoType, 1277 raw_tokens: t.List[Token], 1278 sql: t.Optional[str] = None, 1279 ) -> t.List[t.Optional[exp.Expression]]: 1280 """ 1281 Parses a list of tokens into a given Expression type. If a collection of Expression 1282 types is given instead, this method will try to parse the token list into each one 1283 of them, stopping at the first for which the parsing succeeds. 1284 1285 Args: 1286 expression_types: The expression type(s) to try and parse the token list into. 1287 raw_tokens: The list of tokens. 1288 sql: The original SQL string, used to produce helpful debug messages. 1289 1290 Returns: 1291 The target Expression. 1292 """ 1293 errors = [] 1294 for expression_type in ensure_list(expression_types): 1295 parser = self.EXPRESSION_PARSERS.get(expression_type) 1296 if not parser: 1297 raise TypeError(f"No parser registered for {expression_type}") 1298 1299 try: 1300 return self._parse(parser, raw_tokens, sql) 1301 except ParseError as e: 1302 e.errors[0]["into_expression"] = expression_type 1303 errors.append(e) 1304 1305 raise ParseError( 1306 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1307 errors=merge_errors(errors), 1308 ) from errors[-1] 1309 1310 def _parse( 1311 self, 1312 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1313 raw_tokens: t.List[Token], 1314 sql: t.Optional[str] = None, 1315 ) -> t.List[t.Optional[exp.Expression]]: 1316 self.reset() 1317 self.sql = sql or "" 1318 1319 total = len(raw_tokens) 1320 chunks: t.List[t.List[Token]] = [[]] 1321 1322 for i, token in enumerate(raw_tokens): 1323 if token.token_type == TokenType.SEMICOLON: 1324 if token.comments: 1325 chunks.append([token]) 1326 1327 if i < total - 1: 1328 chunks.append([]) 1329 else: 1330 chunks[-1].append(token) 1331 1332 expressions = [] 1333 1334 for tokens in chunks: 1335 self._index = -1 1336 self._tokens = tokens 1337 self._advance() 1338 1339 expressions.append(parse_method(self)) 1340 1341 if self._index < len(self._tokens): 1342 self.raise_error("Invalid expression / Unexpected token") 1343 1344 self.check_errors() 1345 1346 return expressions 1347 1348 def check_errors(self) -> None: 1349 """Logs or raises any found errors, depending on the chosen error level setting.""" 1350 if self.error_level == ErrorLevel.WARN: 1351 for error in self.errors: 1352 logger.error(str(error)) 1353 elif self.error_level == ErrorLevel.RAISE and self.errors: 1354 raise ParseError( 1355 concat_messages(self.errors, self.max_errors), 1356 errors=merge_errors(self.errors), 1357 ) 1358 1359 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1360 """ 1361 Appends an error in the list of recorded errors or raises it, depending on the chosen 1362 error level setting. 1363 """ 1364 token = token or self._curr or self._prev or Token.string("") 1365 start = token.start 1366 end = token.end + 1 1367 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1368 highlight = self.sql[start:end] 1369 end_context = self.sql[end : end + self.error_message_context] 1370 1371 error = ParseError.new( 1372 f"{message}. Line {token.line}, Col: {token.col}.\n" 1373 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1374 description=message, 1375 line=token.line, 1376 col=token.col, 1377 start_context=start_context, 1378 highlight=highlight, 1379 end_context=end_context, 1380 ) 1381 1382 if self.error_level == ErrorLevel.IMMEDIATE: 1383 raise error 1384 1385 self.errors.append(error) 1386 1387 def expression( 1388 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1389 ) -> E: 1390 """ 1391 Creates a new, validated Expression. 1392 1393 Args: 1394 exp_class: The expression class to instantiate. 1395 comments: An optional list of comments to attach to the expression. 1396 kwargs: The arguments to set for the expression along with their respective values. 1397 1398 Returns: 1399 The target expression. 1400 """ 1401 instance = exp_class(**kwargs) 1402 instance.add_comments(comments) if comments else self._add_comments(instance) 1403 return self.validate_expression(instance) 1404 1405 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1406 if expression and self._prev_comments: 1407 expression.add_comments(self._prev_comments) 1408 self._prev_comments = None 1409 1410 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1411 """ 1412 Validates an Expression, making sure that all its mandatory arguments are set. 1413 1414 Args: 1415 expression: The expression to validate. 1416 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1417 1418 Returns: 1419 The validated expression. 1420 """ 1421 if self.error_level != ErrorLevel.IGNORE: 1422 for error_message in expression.error_messages(args): 1423 self.raise_error(error_message) 1424 1425 return expression 1426 1427 def _find_sql(self, start: Token, end: Token) -> str: 1428 return self.sql[start.start : end.end + 1] 1429 1430 def _is_connected(self) -> bool: 1431 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1432 1433 def _advance(self, times: int = 1) -> None: 1434 self._index += times 1435 self._curr = seq_get(self._tokens, self._index) 1436 self._next = seq_get(self._tokens, self._index + 1) 1437 1438 if self._index > 0: 1439 self._prev = self._tokens[self._index - 1] 1440 self._prev_comments = self._prev.comments 1441 else: 1442 self._prev = None 1443 self._prev_comments = None 1444 1445 def _retreat(self, index: int) -> None: 1446 if index != self._index: 1447 self._advance(index - self._index) 1448 1449 def _warn_unsupported(self) -> None: 1450 if len(self._tokens) <= 1: 1451 return 1452 1453 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1454 # interested in emitting a warning for the one being currently processed. 1455 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1456 1457 logger.warning( 1458 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1459 ) 1460 1461 def _parse_command(self) -> exp.Command: 1462 self._warn_unsupported() 1463 return self.expression( 1464 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1465 ) 1466 1467 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1468 """ 1469 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1470 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1471 the parser state accordingly 1472 """ 1473 index = self._index 1474 error_level = self.error_level 1475 1476 self.error_level = ErrorLevel.IMMEDIATE 1477 try: 1478 this = parse_method() 1479 except ParseError: 1480 this = None 1481 finally: 1482 if not this or retreat: 1483 self._retreat(index) 1484 self.error_level = error_level 1485 1486 return this 1487 1488 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1489 start = self._prev 1490 exists = self._parse_exists() if allow_exists else None 1491 1492 self._match(TokenType.ON) 1493 1494 materialized = self._match_text_seq("MATERIALIZED") 1495 kind = self._match_set(self.CREATABLES) and self._prev 1496 if not kind: 1497 return self._parse_as_command(start) 1498 1499 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1500 this = self._parse_user_defined_function(kind=kind.token_type) 1501 elif kind.token_type == TokenType.TABLE: 1502 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1503 elif kind.token_type == TokenType.COLUMN: 1504 this = self._parse_column() 1505 else: 1506 this = self._parse_id_var() 1507 1508 self._match(TokenType.IS) 1509 1510 return self.expression( 1511 exp.Comment, 1512 this=this, 1513 kind=kind.text, 1514 expression=self._parse_string(), 1515 exists=exists, 1516 materialized=materialized, 1517 ) 1518 1519 def _parse_to_table( 1520 self, 1521 ) -> exp.ToTableProperty: 1522 table = self._parse_table_parts(schema=True) 1523 return self.expression(exp.ToTableProperty, this=table) 1524 1525 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1526 def _parse_ttl(self) -> exp.Expression: 1527 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1528 this = self._parse_bitwise() 1529 1530 if self._match_text_seq("DELETE"): 1531 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1532 if self._match_text_seq("RECOMPRESS"): 1533 return self.expression( 1534 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1535 ) 1536 if self._match_text_seq("TO", "DISK"): 1537 return self.expression( 1538 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1539 ) 1540 if self._match_text_seq("TO", "VOLUME"): 1541 return self.expression( 1542 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1543 ) 1544 1545 return this 1546 1547 expressions = self._parse_csv(_parse_ttl_action) 1548 where = self._parse_where() 1549 group = self._parse_group() 1550 1551 aggregates = None 1552 if group and self._match(TokenType.SET): 1553 aggregates = self._parse_csv(self._parse_set_item) 1554 1555 return self.expression( 1556 exp.MergeTreeTTL, 1557 expressions=expressions, 1558 where=where, 1559 group=group, 1560 aggregates=aggregates, 1561 ) 1562 1563 def _parse_statement(self) -> t.Optional[exp.Expression]: 1564 if self._curr is None: 1565 return None 1566 1567 if self._match_set(self.STATEMENT_PARSERS): 1568 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1569 1570 if self._match_set(self.dialect.tokenizer.COMMANDS): 1571 return self._parse_command() 1572 1573 expression = self._parse_expression() 1574 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1575 return self._parse_query_modifiers(expression) 1576 1577 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1578 start = self._prev 1579 temporary = self._match(TokenType.TEMPORARY) 1580 materialized = self._match_text_seq("MATERIALIZED") 1581 1582 kind = self._match_set(self.CREATABLES) and self._prev.text 1583 if not kind: 1584 return self._parse_as_command(start) 1585 1586 if_exists = exists or self._parse_exists() 1587 table = self._parse_table_parts( 1588 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1589 ) 1590 1591 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1592 1593 if self._match(TokenType.L_PAREN, advance=False): 1594 expressions = self._parse_wrapped_csv(self._parse_types) 1595 else: 1596 expressions = None 1597 1598 return self.expression( 1599 exp.Drop, 1600 comments=start.comments, 1601 exists=if_exists, 1602 this=table, 1603 expressions=expressions, 1604 kind=kind.upper(), 1605 temporary=temporary, 1606 materialized=materialized, 1607 cascade=self._match_text_seq("CASCADE"), 1608 constraints=self._match_text_seq("CONSTRAINTS"), 1609 purge=self._match_text_seq("PURGE"), 1610 cluster=cluster, 1611 ) 1612 1613 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1614 return ( 1615 self._match_text_seq("IF") 1616 and (not not_ or self._match(TokenType.NOT)) 1617 and self._match(TokenType.EXISTS) 1618 ) 1619 1620 def _parse_create(self) -> exp.Create | exp.Command: 1621 # Note: this can't be None because we've matched a statement parser 1622 start = self._prev 1623 comments = self._prev_comments 1624 1625 replace = ( 1626 start.token_type == TokenType.REPLACE 1627 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1628 or self._match_pair(TokenType.OR, TokenType.ALTER) 1629 ) 1630 1631 unique = self._match(TokenType.UNIQUE) 1632 1633 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1634 self._advance() 1635 1636 properties = None 1637 create_token = self._match_set(self.CREATABLES) and self._prev 1638 1639 if not create_token: 1640 # exp.Properties.Location.POST_CREATE 1641 properties = self._parse_properties() 1642 create_token = self._match_set(self.CREATABLES) and self._prev 1643 1644 if not properties or not create_token: 1645 return self._parse_as_command(start) 1646 1647 exists = self._parse_exists(not_=True) 1648 this = None 1649 expression: t.Optional[exp.Expression] = None 1650 indexes = None 1651 no_schema_binding = None 1652 begin = None 1653 end = None 1654 clone = None 1655 1656 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1657 nonlocal properties 1658 if properties and temp_props: 1659 properties.expressions.extend(temp_props.expressions) 1660 elif temp_props: 1661 properties = temp_props 1662 1663 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1664 this = self._parse_user_defined_function(kind=create_token.token_type) 1665 1666 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1667 extend_props(self._parse_properties()) 1668 1669 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1670 extend_props(self._parse_properties()) 1671 1672 if not expression: 1673 if self._match(TokenType.COMMAND): 1674 expression = self._parse_as_command(self._prev) 1675 else: 1676 begin = self._match(TokenType.BEGIN) 1677 return_ = self._match_text_seq("RETURN") 1678 1679 if self._match(TokenType.STRING, advance=False): 1680 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1681 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1682 expression = self._parse_string() 1683 extend_props(self._parse_properties()) 1684 else: 1685 expression = self._parse_statement() 1686 1687 end = self._match_text_seq("END") 1688 1689 if return_: 1690 expression = self.expression(exp.Return, this=expression) 1691 elif create_token.token_type == TokenType.INDEX: 1692 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1693 if not self._match(TokenType.ON): 1694 index = self._parse_id_var() 1695 anonymous = False 1696 else: 1697 index = None 1698 anonymous = True 1699 1700 this = self._parse_index(index=index, anonymous=anonymous) 1701 elif create_token.token_type in self.DB_CREATABLES: 1702 table_parts = self._parse_table_parts( 1703 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1704 ) 1705 1706 # exp.Properties.Location.POST_NAME 1707 self._match(TokenType.COMMA) 1708 extend_props(self._parse_properties(before=True)) 1709 1710 this = self._parse_schema(this=table_parts) 1711 1712 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1713 extend_props(self._parse_properties()) 1714 1715 self._match(TokenType.ALIAS) 1716 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1717 # exp.Properties.Location.POST_ALIAS 1718 extend_props(self._parse_properties()) 1719 1720 if create_token.token_type == TokenType.SEQUENCE: 1721 expression = self._parse_types() 1722 extend_props(self._parse_properties()) 1723 else: 1724 expression = self._parse_ddl_select() 1725 1726 if create_token.token_type == TokenType.TABLE: 1727 # exp.Properties.Location.POST_EXPRESSION 1728 extend_props(self._parse_properties()) 1729 1730 indexes = [] 1731 while True: 1732 index = self._parse_index() 1733 1734 # exp.Properties.Location.POST_INDEX 1735 extend_props(self._parse_properties()) 1736 1737 if not index: 1738 break 1739 else: 1740 self._match(TokenType.COMMA) 1741 indexes.append(index) 1742 elif create_token.token_type == TokenType.VIEW: 1743 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1744 no_schema_binding = True 1745 1746 shallow = self._match_text_seq("SHALLOW") 1747 1748 if self._match_texts(self.CLONE_KEYWORDS): 1749 copy = self._prev.text.lower() == "copy" 1750 clone = self.expression( 1751 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1752 ) 1753 1754 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1755 return self._parse_as_command(start) 1756 1757 return self.expression( 1758 exp.Create, 1759 comments=comments, 1760 this=this, 1761 kind=create_token.text.upper(), 1762 replace=replace, 1763 unique=unique, 1764 expression=expression, 1765 exists=exists, 1766 properties=properties, 1767 indexes=indexes, 1768 no_schema_binding=no_schema_binding, 1769 begin=begin, 1770 end=end, 1771 clone=clone, 1772 ) 1773 1774 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1775 seq = exp.SequenceProperties() 1776 1777 options = [] 1778 index = self._index 1779 1780 while self._curr: 1781 self._match(TokenType.COMMA) 1782 if self._match_text_seq("INCREMENT"): 1783 self._match_text_seq("BY") 1784 self._match_text_seq("=") 1785 seq.set("increment", self._parse_term()) 1786 elif self._match_text_seq("MINVALUE"): 1787 seq.set("minvalue", self._parse_term()) 1788 elif self._match_text_seq("MAXVALUE"): 1789 seq.set("maxvalue", self._parse_term()) 1790 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1791 self._match_text_seq("=") 1792 seq.set("start", self._parse_term()) 1793 elif self._match_text_seq("CACHE"): 1794 # T-SQL allows empty CACHE which is initialized dynamically 1795 seq.set("cache", self._parse_number() or True) 1796 elif self._match_text_seq("OWNED", "BY"): 1797 # "OWNED BY NONE" is the default 1798 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1799 else: 1800 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1801 if opt: 1802 options.append(opt) 1803 else: 1804 break 1805 1806 seq.set("options", options if options else None) 1807 return None if self._index == index else seq 1808 1809 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1810 # only used for teradata currently 1811 self._match(TokenType.COMMA) 1812 1813 kwargs = { 1814 "no": self._match_text_seq("NO"), 1815 "dual": self._match_text_seq("DUAL"), 1816 "before": self._match_text_seq("BEFORE"), 1817 "default": self._match_text_seq("DEFAULT"), 1818 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1819 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1820 "after": self._match_text_seq("AFTER"), 1821 "minimum": self._match_texts(("MIN", "MINIMUM")), 1822 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1823 } 1824 1825 if self._match_texts(self.PROPERTY_PARSERS): 1826 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1827 try: 1828 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1829 except TypeError: 1830 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1831 1832 return None 1833 1834 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1835 return self._parse_wrapped_csv(self._parse_property) 1836 1837 def _parse_property(self) -> t.Optional[exp.Expression]: 1838 if self._match_texts(self.PROPERTY_PARSERS): 1839 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1840 1841 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1842 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1843 1844 if self._match_text_seq("COMPOUND", "SORTKEY"): 1845 return self._parse_sortkey(compound=True) 1846 1847 if self._match_text_seq("SQL", "SECURITY"): 1848 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1849 1850 index = self._index 1851 key = self._parse_column() 1852 1853 if not self._match(TokenType.EQ): 1854 self._retreat(index) 1855 return self._parse_sequence_properties() 1856 1857 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1858 if isinstance(key, exp.Column): 1859 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1860 1861 value = self._parse_bitwise() or self._parse_var(any_token=True) 1862 1863 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1864 if isinstance(value, exp.Column): 1865 value = exp.var(value.name) 1866 1867 return self.expression(exp.Property, this=key, value=value) 1868 1869 def _parse_stored(self) -> exp.FileFormatProperty: 1870 self._match(TokenType.ALIAS) 1871 1872 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1873 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1874 1875 return self.expression( 1876 exp.FileFormatProperty, 1877 this=( 1878 self.expression( 1879 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1880 ) 1881 if input_format or output_format 1882 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1883 ), 1884 ) 1885 1886 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1887 field = self._parse_field() 1888 if isinstance(field, exp.Identifier) and not field.quoted: 1889 field = exp.var(field) 1890 1891 return field 1892 1893 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1894 self._match(TokenType.EQ) 1895 self._match(TokenType.ALIAS) 1896 1897 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1898 1899 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1900 properties = [] 1901 while True: 1902 if before: 1903 prop = self._parse_property_before() 1904 else: 1905 prop = self._parse_property() 1906 if not prop: 1907 break 1908 for p in ensure_list(prop): 1909 properties.append(p) 1910 1911 if properties: 1912 return self.expression(exp.Properties, expressions=properties) 1913 1914 return None 1915 1916 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1917 return self.expression( 1918 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1919 ) 1920 1921 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1922 if self._index >= 2: 1923 pre_volatile_token = self._tokens[self._index - 2] 1924 else: 1925 pre_volatile_token = None 1926 1927 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1928 return exp.VolatileProperty() 1929 1930 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1931 1932 def _parse_retention_period(self) -> exp.Var: 1933 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1934 number = self._parse_number() 1935 number_str = f"{number} " if number else "" 1936 unit = self._parse_var(any_token=True) 1937 return exp.var(f"{number_str}{unit}") 1938 1939 def _parse_system_versioning_property( 1940 self, with_: bool = False 1941 ) -> exp.WithSystemVersioningProperty: 1942 self._match(TokenType.EQ) 1943 prop = self.expression( 1944 exp.WithSystemVersioningProperty, 1945 **{ # type: ignore 1946 "on": True, 1947 "with": with_, 1948 }, 1949 ) 1950 1951 if self._match_text_seq("OFF"): 1952 prop.set("on", False) 1953 return prop 1954 1955 self._match(TokenType.ON) 1956 if self._match(TokenType.L_PAREN): 1957 while self._curr and not self._match(TokenType.R_PAREN): 1958 if self._match_text_seq("HISTORY_TABLE", "="): 1959 prop.set("this", self._parse_table_parts()) 1960 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1961 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1962 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1963 prop.set("retention_period", self._parse_retention_period()) 1964 1965 self._match(TokenType.COMMA) 1966 1967 return prop 1968 1969 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1970 self._match(TokenType.EQ) 1971 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1972 prop = self.expression(exp.DataDeletionProperty, on=on) 1973 1974 if self._match(TokenType.L_PAREN): 1975 while self._curr and not self._match(TokenType.R_PAREN): 1976 if self._match_text_seq("FILTER_COLUMN", "="): 1977 prop.set("filter_column", self._parse_column()) 1978 elif self._match_text_seq("RETENTION_PERIOD", "="): 1979 prop.set("retention_period", self._parse_retention_period()) 1980 1981 self._match(TokenType.COMMA) 1982 1983 return prop 1984 1985 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1986 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1987 prop = self._parse_system_versioning_property(with_=True) 1988 self._match_r_paren() 1989 return prop 1990 1991 if self._match(TokenType.L_PAREN, advance=False): 1992 return self._parse_wrapped_properties() 1993 1994 if self._match_text_seq("JOURNAL"): 1995 return self._parse_withjournaltable() 1996 1997 if self._match_texts(self.VIEW_ATTRIBUTES): 1998 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1999 2000 if self._match_text_seq("DATA"): 2001 return self._parse_withdata(no=False) 2002 elif self._match_text_seq("NO", "DATA"): 2003 return self._parse_withdata(no=True) 2004 2005 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2006 return self._parse_serde_properties(with_=True) 2007 2008 if not self._next: 2009 return None 2010 2011 return self._parse_withisolatedloading() 2012 2013 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2014 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2015 self._match(TokenType.EQ) 2016 2017 user = self._parse_id_var() 2018 self._match(TokenType.PARAMETER) 2019 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2020 2021 if not user or not host: 2022 return None 2023 2024 return exp.DefinerProperty(this=f"{user}@{host}") 2025 2026 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2027 self._match(TokenType.TABLE) 2028 self._match(TokenType.EQ) 2029 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2030 2031 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2032 return self.expression(exp.LogProperty, no=no) 2033 2034 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2035 return self.expression(exp.JournalProperty, **kwargs) 2036 2037 def _parse_checksum(self) -> exp.ChecksumProperty: 2038 self._match(TokenType.EQ) 2039 2040 on = None 2041 if self._match(TokenType.ON): 2042 on = True 2043 elif self._match_text_seq("OFF"): 2044 on = False 2045 2046 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2047 2048 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2049 return self.expression( 2050 exp.Cluster, 2051 expressions=( 2052 self._parse_wrapped_csv(self._parse_ordered) 2053 if wrapped 2054 else self._parse_csv(self._parse_ordered) 2055 ), 2056 ) 2057 2058 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2059 self._match_text_seq("BY") 2060 2061 self._match_l_paren() 2062 expressions = self._parse_csv(self._parse_column) 2063 self._match_r_paren() 2064 2065 if self._match_text_seq("SORTED", "BY"): 2066 self._match_l_paren() 2067 sorted_by = self._parse_csv(self._parse_ordered) 2068 self._match_r_paren() 2069 else: 2070 sorted_by = None 2071 2072 self._match(TokenType.INTO) 2073 buckets = self._parse_number() 2074 self._match_text_seq("BUCKETS") 2075 2076 return self.expression( 2077 exp.ClusteredByProperty, 2078 expressions=expressions, 2079 sorted_by=sorted_by, 2080 buckets=buckets, 2081 ) 2082 2083 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2084 if not self._match_text_seq("GRANTS"): 2085 self._retreat(self._index - 1) 2086 return None 2087 2088 return self.expression(exp.CopyGrantsProperty) 2089 2090 def _parse_freespace(self) -> exp.FreespaceProperty: 2091 self._match(TokenType.EQ) 2092 return self.expression( 2093 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2094 ) 2095 2096 def _parse_mergeblockratio( 2097 self, no: bool = False, default: bool = False 2098 ) -> exp.MergeBlockRatioProperty: 2099 if self._match(TokenType.EQ): 2100 return self.expression( 2101 exp.MergeBlockRatioProperty, 2102 this=self._parse_number(), 2103 percent=self._match(TokenType.PERCENT), 2104 ) 2105 2106 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2107 2108 def _parse_datablocksize( 2109 self, 2110 default: t.Optional[bool] = None, 2111 minimum: t.Optional[bool] = None, 2112 maximum: t.Optional[bool] = None, 2113 ) -> exp.DataBlocksizeProperty: 2114 self._match(TokenType.EQ) 2115 size = self._parse_number() 2116 2117 units = None 2118 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2119 units = self._prev.text 2120 2121 return self.expression( 2122 exp.DataBlocksizeProperty, 2123 size=size, 2124 units=units, 2125 default=default, 2126 minimum=minimum, 2127 maximum=maximum, 2128 ) 2129 2130 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2131 self._match(TokenType.EQ) 2132 always = self._match_text_seq("ALWAYS") 2133 manual = self._match_text_seq("MANUAL") 2134 never = self._match_text_seq("NEVER") 2135 default = self._match_text_seq("DEFAULT") 2136 2137 autotemp = None 2138 if self._match_text_seq("AUTOTEMP"): 2139 autotemp = self._parse_schema() 2140 2141 return self.expression( 2142 exp.BlockCompressionProperty, 2143 always=always, 2144 manual=manual, 2145 never=never, 2146 default=default, 2147 autotemp=autotemp, 2148 ) 2149 2150 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2151 index = self._index 2152 no = self._match_text_seq("NO") 2153 concurrent = self._match_text_seq("CONCURRENT") 2154 2155 if not self._match_text_seq("ISOLATED", "LOADING"): 2156 self._retreat(index) 2157 return None 2158 2159 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2160 return self.expression( 2161 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2162 ) 2163 2164 def _parse_locking(self) -> exp.LockingProperty: 2165 if self._match(TokenType.TABLE): 2166 kind = "TABLE" 2167 elif self._match(TokenType.VIEW): 2168 kind = "VIEW" 2169 elif self._match(TokenType.ROW): 2170 kind = "ROW" 2171 elif self._match_text_seq("DATABASE"): 2172 kind = "DATABASE" 2173 else: 2174 kind = None 2175 2176 if kind in ("DATABASE", "TABLE", "VIEW"): 2177 this = self._parse_table_parts() 2178 else: 2179 this = None 2180 2181 if self._match(TokenType.FOR): 2182 for_or_in = "FOR" 2183 elif self._match(TokenType.IN): 2184 for_or_in = "IN" 2185 else: 2186 for_or_in = None 2187 2188 if self._match_text_seq("ACCESS"): 2189 lock_type = "ACCESS" 2190 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2191 lock_type = "EXCLUSIVE" 2192 elif self._match_text_seq("SHARE"): 2193 lock_type = "SHARE" 2194 elif self._match_text_seq("READ"): 2195 lock_type = "READ" 2196 elif self._match_text_seq("WRITE"): 2197 lock_type = "WRITE" 2198 elif self._match_text_seq("CHECKSUM"): 2199 lock_type = "CHECKSUM" 2200 else: 2201 lock_type = None 2202 2203 override = self._match_text_seq("OVERRIDE") 2204 2205 return self.expression( 2206 exp.LockingProperty, 2207 this=this, 2208 kind=kind, 2209 for_or_in=for_or_in, 2210 lock_type=lock_type, 2211 override=override, 2212 ) 2213 2214 def _parse_partition_by(self) -> t.List[exp.Expression]: 2215 if self._match(TokenType.PARTITION_BY): 2216 return self._parse_csv(self._parse_assignment) 2217 return [] 2218 2219 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2220 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2221 if self._match_text_seq("MINVALUE"): 2222 return exp.var("MINVALUE") 2223 if self._match_text_seq("MAXVALUE"): 2224 return exp.var("MAXVALUE") 2225 return self._parse_bitwise() 2226 2227 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2228 expression = None 2229 from_expressions = None 2230 to_expressions = None 2231 2232 if self._match(TokenType.IN): 2233 this = self._parse_wrapped_csv(self._parse_bitwise) 2234 elif self._match(TokenType.FROM): 2235 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2236 self._match_text_seq("TO") 2237 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2238 elif self._match_text_seq("WITH", "(", "MODULUS"): 2239 this = self._parse_number() 2240 self._match_text_seq(",", "REMAINDER") 2241 expression = self._parse_number() 2242 self._match_r_paren() 2243 else: 2244 self.raise_error("Failed to parse partition bound spec.") 2245 2246 return self.expression( 2247 exp.PartitionBoundSpec, 2248 this=this, 2249 expression=expression, 2250 from_expressions=from_expressions, 2251 to_expressions=to_expressions, 2252 ) 2253 2254 # https://www.postgresql.org/docs/current/sql-createtable.html 2255 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2256 if not self._match_text_seq("OF"): 2257 self._retreat(self._index - 1) 2258 return None 2259 2260 this = self._parse_table(schema=True) 2261 2262 if self._match(TokenType.DEFAULT): 2263 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2264 elif self._match_text_seq("FOR", "VALUES"): 2265 expression = self._parse_partition_bound_spec() 2266 else: 2267 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2268 2269 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2270 2271 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2272 self._match(TokenType.EQ) 2273 return self.expression( 2274 exp.PartitionedByProperty, 2275 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2276 ) 2277 2278 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2279 if self._match_text_seq("AND", "STATISTICS"): 2280 statistics = True 2281 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2282 statistics = False 2283 else: 2284 statistics = None 2285 2286 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2287 2288 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2289 if self._match_text_seq("SQL"): 2290 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2291 return None 2292 2293 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2294 if self._match_text_seq("SQL", "DATA"): 2295 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2296 return None 2297 2298 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2299 if self._match_text_seq("PRIMARY", "INDEX"): 2300 return exp.NoPrimaryIndexProperty() 2301 if self._match_text_seq("SQL"): 2302 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2303 return None 2304 2305 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2306 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2307 return exp.OnCommitProperty() 2308 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2309 return exp.OnCommitProperty(delete=True) 2310 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2311 2312 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2313 if self._match_text_seq("SQL", "DATA"): 2314 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2315 return None 2316 2317 def _parse_distkey(self) -> exp.DistKeyProperty: 2318 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2319 2320 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2321 table = self._parse_table(schema=True) 2322 2323 options = [] 2324 while self._match_texts(("INCLUDING", "EXCLUDING")): 2325 this = self._prev.text.upper() 2326 2327 id_var = self._parse_id_var() 2328 if not id_var: 2329 return None 2330 2331 options.append( 2332 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2333 ) 2334 2335 return self.expression(exp.LikeProperty, this=table, expressions=options) 2336 2337 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2338 return self.expression( 2339 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2340 ) 2341 2342 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2343 self._match(TokenType.EQ) 2344 return self.expression( 2345 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2346 ) 2347 2348 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2349 self._match_text_seq("WITH", "CONNECTION") 2350 return self.expression( 2351 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2352 ) 2353 2354 def _parse_returns(self) -> exp.ReturnsProperty: 2355 value: t.Optional[exp.Expression] 2356 null = None 2357 is_table = self._match(TokenType.TABLE) 2358 2359 if is_table: 2360 if self._match(TokenType.LT): 2361 value = self.expression( 2362 exp.Schema, 2363 this="TABLE", 2364 expressions=self._parse_csv(self._parse_struct_types), 2365 ) 2366 if not self._match(TokenType.GT): 2367 self.raise_error("Expecting >") 2368 else: 2369 value = self._parse_schema(exp.var("TABLE")) 2370 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2371 null = True 2372 value = None 2373 else: 2374 value = self._parse_types() 2375 2376 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2377 2378 def _parse_describe(self) -> exp.Describe: 2379 kind = self._match_set(self.CREATABLES) and self._prev.text 2380 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2381 if self._match(TokenType.DOT): 2382 style = None 2383 self._retreat(self._index - 2) 2384 this = self._parse_table(schema=True) 2385 properties = self._parse_properties() 2386 expressions = properties.expressions if properties else None 2387 return self.expression( 2388 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2389 ) 2390 2391 def _parse_insert(self) -> exp.Insert: 2392 comments = ensure_list(self._prev_comments) 2393 hint = self._parse_hint() 2394 overwrite = self._match(TokenType.OVERWRITE) 2395 ignore = self._match(TokenType.IGNORE) 2396 local = self._match_text_seq("LOCAL") 2397 alternative = None 2398 is_function = None 2399 2400 if self._match_text_seq("DIRECTORY"): 2401 this: t.Optional[exp.Expression] = self.expression( 2402 exp.Directory, 2403 this=self._parse_var_or_string(), 2404 local=local, 2405 row_format=self._parse_row_format(match_row=True), 2406 ) 2407 else: 2408 if self._match(TokenType.OR): 2409 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2410 2411 self._match(TokenType.INTO) 2412 comments += ensure_list(self._prev_comments) 2413 self._match(TokenType.TABLE) 2414 is_function = self._match(TokenType.FUNCTION) 2415 2416 this = ( 2417 self._parse_table(schema=True, parse_partition=True) 2418 if not is_function 2419 else self._parse_function() 2420 ) 2421 2422 returning = self._parse_returning() 2423 2424 return self.expression( 2425 exp.Insert, 2426 comments=comments, 2427 hint=hint, 2428 is_function=is_function, 2429 this=this, 2430 stored=self._match_text_seq("STORED") and self._parse_stored(), 2431 by_name=self._match_text_seq("BY", "NAME"), 2432 exists=self._parse_exists(), 2433 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2434 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2435 conflict=self._parse_on_conflict(), 2436 returning=returning or self._parse_returning(), 2437 overwrite=overwrite, 2438 alternative=alternative, 2439 ignore=ignore, 2440 ) 2441 2442 def _parse_kill(self) -> exp.Kill: 2443 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2444 2445 return self.expression( 2446 exp.Kill, 2447 this=self._parse_primary(), 2448 kind=kind, 2449 ) 2450 2451 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2452 conflict = self._match_text_seq("ON", "CONFLICT") 2453 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2454 2455 if not conflict and not duplicate: 2456 return None 2457 2458 conflict_keys = None 2459 constraint = None 2460 2461 if conflict: 2462 if self._match_text_seq("ON", "CONSTRAINT"): 2463 constraint = self._parse_id_var() 2464 elif self._match(TokenType.L_PAREN): 2465 conflict_keys = self._parse_csv(self._parse_id_var) 2466 self._match_r_paren() 2467 2468 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2469 if self._prev.token_type == TokenType.UPDATE: 2470 self._match(TokenType.SET) 2471 expressions = self._parse_csv(self._parse_equality) 2472 else: 2473 expressions = None 2474 2475 return self.expression( 2476 exp.OnConflict, 2477 duplicate=duplicate, 2478 expressions=expressions, 2479 action=action, 2480 conflict_keys=conflict_keys, 2481 constraint=constraint, 2482 ) 2483 2484 def _parse_returning(self) -> t.Optional[exp.Returning]: 2485 if not self._match(TokenType.RETURNING): 2486 return None 2487 return self.expression( 2488 exp.Returning, 2489 expressions=self._parse_csv(self._parse_expression), 2490 into=self._match(TokenType.INTO) and self._parse_table_part(), 2491 ) 2492 2493 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2494 if not self._match(TokenType.FORMAT): 2495 return None 2496 return self._parse_row_format() 2497 2498 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2499 index = self._index 2500 with_ = with_ or self._match_text_seq("WITH") 2501 2502 if not self._match(TokenType.SERDE_PROPERTIES): 2503 self._retreat(index) 2504 return None 2505 return self.expression( 2506 exp.SerdeProperties, 2507 **{ # type: ignore 2508 "expressions": self._parse_wrapped_properties(), 2509 "with": with_, 2510 }, 2511 ) 2512 2513 def _parse_row_format( 2514 self, match_row: bool = False 2515 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2516 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2517 return None 2518 2519 if self._match_text_seq("SERDE"): 2520 this = self._parse_string() 2521 2522 serde_properties = self._parse_serde_properties() 2523 2524 return self.expression( 2525 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2526 ) 2527 2528 self._match_text_seq("DELIMITED") 2529 2530 kwargs = {} 2531 2532 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2533 kwargs["fields"] = self._parse_string() 2534 if self._match_text_seq("ESCAPED", "BY"): 2535 kwargs["escaped"] = self._parse_string() 2536 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2537 kwargs["collection_items"] = self._parse_string() 2538 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2539 kwargs["map_keys"] = self._parse_string() 2540 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2541 kwargs["lines"] = self._parse_string() 2542 if self._match_text_seq("NULL", "DEFINED", "AS"): 2543 kwargs["null"] = self._parse_string() 2544 2545 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2546 2547 def _parse_load(self) -> exp.LoadData | exp.Command: 2548 if self._match_text_seq("DATA"): 2549 local = self._match_text_seq("LOCAL") 2550 self._match_text_seq("INPATH") 2551 inpath = self._parse_string() 2552 overwrite = self._match(TokenType.OVERWRITE) 2553 self._match_pair(TokenType.INTO, TokenType.TABLE) 2554 2555 return self.expression( 2556 exp.LoadData, 2557 this=self._parse_table(schema=True), 2558 local=local, 2559 overwrite=overwrite, 2560 inpath=inpath, 2561 partition=self._parse_partition(), 2562 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2563 serde=self._match_text_seq("SERDE") and self._parse_string(), 2564 ) 2565 return self._parse_as_command(self._prev) 2566 2567 def _parse_delete(self) -> exp.Delete: 2568 # This handles MySQL's "Multiple-Table Syntax" 2569 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2570 tables = None 2571 comments = self._prev_comments 2572 if not self._match(TokenType.FROM, advance=False): 2573 tables = self._parse_csv(self._parse_table) or None 2574 2575 returning = self._parse_returning() 2576 2577 return self.expression( 2578 exp.Delete, 2579 comments=comments, 2580 tables=tables, 2581 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2582 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2583 where=self._parse_where(), 2584 returning=returning or self._parse_returning(), 2585 limit=self._parse_limit(), 2586 ) 2587 2588 def _parse_update(self) -> exp.Update: 2589 comments = self._prev_comments 2590 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2591 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2592 returning = self._parse_returning() 2593 return self.expression( 2594 exp.Update, 2595 comments=comments, 2596 **{ # type: ignore 2597 "this": this, 2598 "expressions": expressions, 2599 "from": self._parse_from(joins=True), 2600 "where": self._parse_where(), 2601 "returning": returning or self._parse_returning(), 2602 "order": self._parse_order(), 2603 "limit": self._parse_limit(), 2604 }, 2605 ) 2606 2607 def _parse_uncache(self) -> exp.Uncache: 2608 if not self._match(TokenType.TABLE): 2609 self.raise_error("Expecting TABLE after UNCACHE") 2610 2611 return self.expression( 2612 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2613 ) 2614 2615 def _parse_cache(self) -> exp.Cache: 2616 lazy = self._match_text_seq("LAZY") 2617 self._match(TokenType.TABLE) 2618 table = self._parse_table(schema=True) 2619 2620 options = [] 2621 if self._match_text_seq("OPTIONS"): 2622 self._match_l_paren() 2623 k = self._parse_string() 2624 self._match(TokenType.EQ) 2625 v = self._parse_string() 2626 options = [k, v] 2627 self._match_r_paren() 2628 2629 self._match(TokenType.ALIAS) 2630 return self.expression( 2631 exp.Cache, 2632 this=table, 2633 lazy=lazy, 2634 options=options, 2635 expression=self._parse_select(nested=True), 2636 ) 2637 2638 def _parse_partition(self) -> t.Optional[exp.Partition]: 2639 if not self._match(TokenType.PARTITION): 2640 return None 2641 2642 return self.expression( 2643 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2644 ) 2645 2646 def _parse_value(self) -> t.Optional[exp.Tuple]: 2647 if self._match(TokenType.L_PAREN): 2648 expressions = self._parse_csv(self._parse_expression) 2649 self._match_r_paren() 2650 return self.expression(exp.Tuple, expressions=expressions) 2651 2652 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2653 expression = self._parse_expression() 2654 if expression: 2655 return self.expression(exp.Tuple, expressions=[expression]) 2656 return None 2657 2658 def _parse_projections(self) -> t.List[exp.Expression]: 2659 return self._parse_expressions() 2660 2661 def _parse_select( 2662 self, 2663 nested: bool = False, 2664 table: bool = False, 2665 parse_subquery_alias: bool = True, 2666 parse_set_operation: bool = True, 2667 ) -> t.Optional[exp.Expression]: 2668 cte = self._parse_with() 2669 2670 if cte: 2671 this = self._parse_statement() 2672 2673 if not this: 2674 self.raise_error("Failed to parse any statement following CTE") 2675 return cte 2676 2677 if "with" in this.arg_types: 2678 this.set("with", cte) 2679 else: 2680 self.raise_error(f"{this.key} does not support CTE") 2681 this = cte 2682 2683 return this 2684 2685 # duckdb supports leading with FROM x 2686 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2687 2688 if self._match(TokenType.SELECT): 2689 comments = self._prev_comments 2690 2691 hint = self._parse_hint() 2692 all_ = self._match(TokenType.ALL) 2693 distinct = self._match_set(self.DISTINCT_TOKENS) 2694 2695 kind = ( 2696 self._match(TokenType.ALIAS) 2697 and self._match_texts(("STRUCT", "VALUE")) 2698 and self._prev.text.upper() 2699 ) 2700 2701 if distinct: 2702 distinct = self.expression( 2703 exp.Distinct, 2704 on=self._parse_value() if self._match(TokenType.ON) else None, 2705 ) 2706 2707 if all_ and distinct: 2708 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2709 2710 limit = self._parse_limit(top=True) 2711 projections = self._parse_projections() 2712 2713 this = self.expression( 2714 exp.Select, 2715 kind=kind, 2716 hint=hint, 2717 distinct=distinct, 2718 expressions=projections, 2719 limit=limit, 2720 ) 2721 this.comments = comments 2722 2723 into = self._parse_into() 2724 if into: 2725 this.set("into", into) 2726 2727 if not from_: 2728 from_ = self._parse_from() 2729 2730 if from_: 2731 this.set("from", from_) 2732 2733 this = self._parse_query_modifiers(this) 2734 elif (table or nested) and self._match(TokenType.L_PAREN): 2735 if self._match(TokenType.PIVOT): 2736 this = self._parse_simplified_pivot() 2737 elif self._match(TokenType.FROM): 2738 this = exp.select("*").from_( 2739 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2740 ) 2741 else: 2742 this = ( 2743 self._parse_table() 2744 if table 2745 else self._parse_select(nested=True, parse_set_operation=False) 2746 ) 2747 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2748 2749 self._match_r_paren() 2750 2751 # We return early here so that the UNION isn't attached to the subquery by the 2752 # following call to _parse_set_operations, but instead becomes the parent node 2753 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2754 elif self._match(TokenType.VALUES, advance=False): 2755 this = self._parse_derived_table_values() 2756 elif from_: 2757 this = exp.select("*").from_(from_.this, copy=False) 2758 else: 2759 this = None 2760 2761 if parse_set_operation: 2762 return self._parse_set_operations(this) 2763 return this 2764 2765 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2766 if not skip_with_token and not self._match(TokenType.WITH): 2767 return None 2768 2769 comments = self._prev_comments 2770 recursive = self._match(TokenType.RECURSIVE) 2771 2772 expressions = [] 2773 while True: 2774 expressions.append(self._parse_cte()) 2775 2776 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2777 break 2778 else: 2779 self._match(TokenType.WITH) 2780 2781 return self.expression( 2782 exp.With, comments=comments, expressions=expressions, recursive=recursive 2783 ) 2784 2785 def _parse_cte(self) -> exp.CTE: 2786 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2787 if not alias or not alias.this: 2788 self.raise_error("Expected CTE to have alias") 2789 2790 self._match(TokenType.ALIAS) 2791 2792 if self._match_text_seq("NOT", "MATERIALIZED"): 2793 materialized = False 2794 elif self._match_text_seq("MATERIALIZED"): 2795 materialized = True 2796 else: 2797 materialized = None 2798 2799 return self.expression( 2800 exp.CTE, 2801 this=self._parse_wrapped(self._parse_statement), 2802 alias=alias, 2803 materialized=materialized, 2804 ) 2805 2806 def _parse_table_alias( 2807 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2808 ) -> t.Optional[exp.TableAlias]: 2809 any_token = self._match(TokenType.ALIAS) 2810 alias = ( 2811 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2812 or self._parse_string_as_identifier() 2813 ) 2814 2815 index = self._index 2816 if self._match(TokenType.L_PAREN): 2817 columns = self._parse_csv(self._parse_function_parameter) 2818 self._match_r_paren() if columns else self._retreat(index) 2819 else: 2820 columns = None 2821 2822 if not alias and not columns: 2823 return None 2824 2825 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2826 2827 # We bubble up comments from the Identifier to the TableAlias 2828 if isinstance(alias, exp.Identifier): 2829 table_alias.add_comments(alias.pop_comments()) 2830 2831 return table_alias 2832 2833 def _parse_subquery( 2834 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2835 ) -> t.Optional[exp.Subquery]: 2836 if not this: 2837 return None 2838 2839 return self.expression( 2840 exp.Subquery, 2841 this=this, 2842 pivots=self._parse_pivots(), 2843 alias=self._parse_table_alias() if parse_alias else None, 2844 ) 2845 2846 def _implicit_unnests_to_explicit(self, this: E) -> E: 2847 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2848 2849 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2850 for i, join in enumerate(this.args.get("joins") or []): 2851 table = join.this 2852 normalized_table = table.copy() 2853 normalized_table.meta["maybe_column"] = True 2854 normalized_table = _norm(normalized_table, dialect=self.dialect) 2855 2856 if isinstance(table, exp.Table) and not join.args.get("on"): 2857 if normalized_table.parts[0].name in refs: 2858 table_as_column = table.to_column() 2859 unnest = exp.Unnest(expressions=[table_as_column]) 2860 2861 # Table.to_column creates a parent Alias node that we want to convert to 2862 # a TableAlias and attach to the Unnest, so it matches the parser's output 2863 if isinstance(table.args.get("alias"), exp.TableAlias): 2864 table_as_column.replace(table_as_column.this) 2865 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2866 2867 table.replace(unnest) 2868 2869 refs.add(normalized_table.alias_or_name) 2870 2871 return this 2872 2873 def _parse_query_modifiers( 2874 self, this: t.Optional[exp.Expression] 2875 ) -> t.Optional[exp.Expression]: 2876 if isinstance(this, (exp.Query, exp.Table)): 2877 for join in self._parse_joins(): 2878 this.append("joins", join) 2879 for lateral in iter(self._parse_lateral, None): 2880 this.append("laterals", lateral) 2881 2882 while True: 2883 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2884 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2885 key, expression = parser(self) 2886 2887 if expression: 2888 this.set(key, expression) 2889 if key == "limit": 2890 offset = expression.args.pop("offset", None) 2891 2892 if offset: 2893 offset = exp.Offset(expression=offset) 2894 this.set("offset", offset) 2895 2896 limit_by_expressions = expression.expressions 2897 expression.set("expressions", None) 2898 offset.set("expressions", limit_by_expressions) 2899 continue 2900 break 2901 2902 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2903 this = self._implicit_unnests_to_explicit(this) 2904 2905 return this 2906 2907 def _parse_hint(self) -> t.Optional[exp.Hint]: 2908 if self._match(TokenType.HINT): 2909 hints = [] 2910 for hint in iter( 2911 lambda: self._parse_csv( 2912 lambda: self._parse_function() or self._parse_var(upper=True) 2913 ), 2914 [], 2915 ): 2916 hints.extend(hint) 2917 2918 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2919 self.raise_error("Expected */ after HINT") 2920 2921 return self.expression(exp.Hint, expressions=hints) 2922 2923 return None 2924 2925 def _parse_into(self) -> t.Optional[exp.Into]: 2926 if not self._match(TokenType.INTO): 2927 return None 2928 2929 temp = self._match(TokenType.TEMPORARY) 2930 unlogged = self._match_text_seq("UNLOGGED") 2931 self._match(TokenType.TABLE) 2932 2933 return self.expression( 2934 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2935 ) 2936 2937 def _parse_from( 2938 self, joins: bool = False, skip_from_token: bool = False 2939 ) -> t.Optional[exp.From]: 2940 if not skip_from_token and not self._match(TokenType.FROM): 2941 return None 2942 2943 return self.expression( 2944 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2945 ) 2946 2947 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2948 return self.expression( 2949 exp.MatchRecognizeMeasure, 2950 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2951 this=self._parse_expression(), 2952 ) 2953 2954 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2955 if not self._match(TokenType.MATCH_RECOGNIZE): 2956 return None 2957 2958 self._match_l_paren() 2959 2960 partition = self._parse_partition_by() 2961 order = self._parse_order() 2962 2963 measures = ( 2964 self._parse_csv(self._parse_match_recognize_measure) 2965 if self._match_text_seq("MEASURES") 2966 else None 2967 ) 2968 2969 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2970 rows = exp.var("ONE ROW PER MATCH") 2971 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2972 text = "ALL ROWS PER MATCH" 2973 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2974 text += " SHOW EMPTY MATCHES" 2975 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2976 text += " OMIT EMPTY MATCHES" 2977 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2978 text += " WITH UNMATCHED ROWS" 2979 rows = exp.var(text) 2980 else: 2981 rows = None 2982 2983 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2984 text = "AFTER MATCH SKIP" 2985 if self._match_text_seq("PAST", "LAST", "ROW"): 2986 text += " PAST LAST ROW" 2987 elif self._match_text_seq("TO", "NEXT", "ROW"): 2988 text += " TO NEXT ROW" 2989 elif self._match_text_seq("TO", "FIRST"): 2990 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2991 elif self._match_text_seq("TO", "LAST"): 2992 text += f" TO LAST {self._advance_any().text}" # type: ignore 2993 after = exp.var(text) 2994 else: 2995 after = None 2996 2997 if self._match_text_seq("PATTERN"): 2998 self._match_l_paren() 2999 3000 if not self._curr: 3001 self.raise_error("Expecting )", self._curr) 3002 3003 paren = 1 3004 start = self._curr 3005 3006 while self._curr and paren > 0: 3007 if self._curr.token_type == TokenType.L_PAREN: 3008 paren += 1 3009 if self._curr.token_type == TokenType.R_PAREN: 3010 paren -= 1 3011 3012 end = self._prev 3013 self._advance() 3014 3015 if paren > 0: 3016 self.raise_error("Expecting )", self._curr) 3017 3018 pattern = exp.var(self._find_sql(start, end)) 3019 else: 3020 pattern = None 3021 3022 define = ( 3023 self._parse_csv(self._parse_name_as_expression) 3024 if self._match_text_seq("DEFINE") 3025 else None 3026 ) 3027 3028 self._match_r_paren() 3029 3030 return self.expression( 3031 exp.MatchRecognize, 3032 partition_by=partition, 3033 order=order, 3034 measures=measures, 3035 rows=rows, 3036 after=after, 3037 pattern=pattern, 3038 define=define, 3039 alias=self._parse_table_alias(), 3040 ) 3041 3042 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3043 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3044 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3045 cross_apply = False 3046 3047 if cross_apply is not None: 3048 this = self._parse_select(table=True) 3049 view = None 3050 outer = None 3051 elif self._match(TokenType.LATERAL): 3052 this = self._parse_select(table=True) 3053 view = self._match(TokenType.VIEW) 3054 outer = self._match(TokenType.OUTER) 3055 else: 3056 return None 3057 3058 if not this: 3059 this = ( 3060 self._parse_unnest() 3061 or self._parse_function() 3062 or self._parse_id_var(any_token=False) 3063 ) 3064 3065 while self._match(TokenType.DOT): 3066 this = exp.Dot( 3067 this=this, 3068 expression=self._parse_function() or self._parse_id_var(any_token=False), 3069 ) 3070 3071 if view: 3072 table = self._parse_id_var(any_token=False) 3073 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3074 table_alias: t.Optional[exp.TableAlias] = self.expression( 3075 exp.TableAlias, this=table, columns=columns 3076 ) 3077 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3078 # We move the alias from the lateral's child node to the lateral itself 3079 table_alias = this.args["alias"].pop() 3080 else: 3081 table_alias = self._parse_table_alias() 3082 3083 return self.expression( 3084 exp.Lateral, 3085 this=this, 3086 view=view, 3087 outer=outer, 3088 alias=table_alias, 3089 cross_apply=cross_apply, 3090 ) 3091 3092 def _parse_join_parts( 3093 self, 3094 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3095 return ( 3096 self._match_set(self.JOIN_METHODS) and self._prev, 3097 self._match_set(self.JOIN_SIDES) and self._prev, 3098 self._match_set(self.JOIN_KINDS) and self._prev, 3099 ) 3100 3101 def _parse_join( 3102 self, skip_join_token: bool = False, parse_bracket: bool = False 3103 ) -> t.Optional[exp.Join]: 3104 if self._match(TokenType.COMMA): 3105 return self.expression(exp.Join, this=self._parse_table()) 3106 3107 index = self._index 3108 method, side, kind = self._parse_join_parts() 3109 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3110 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3111 3112 if not skip_join_token and not join: 3113 self._retreat(index) 3114 kind = None 3115 method = None 3116 side = None 3117 3118 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3119 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3120 3121 if not skip_join_token and not join and not outer_apply and not cross_apply: 3122 return None 3123 3124 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3125 3126 if method: 3127 kwargs["method"] = method.text 3128 if side: 3129 kwargs["side"] = side.text 3130 if kind: 3131 kwargs["kind"] = kind.text 3132 if hint: 3133 kwargs["hint"] = hint 3134 3135 if self._match(TokenType.MATCH_CONDITION): 3136 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3137 3138 if self._match(TokenType.ON): 3139 kwargs["on"] = self._parse_assignment() 3140 elif self._match(TokenType.USING): 3141 kwargs["using"] = self._parse_wrapped_id_vars() 3142 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3143 kind and kind.token_type == TokenType.CROSS 3144 ): 3145 index = self._index 3146 joins: t.Optional[list] = list(self._parse_joins()) 3147 3148 if joins and self._match(TokenType.ON): 3149 kwargs["on"] = self._parse_assignment() 3150 elif joins and self._match(TokenType.USING): 3151 kwargs["using"] = self._parse_wrapped_id_vars() 3152 else: 3153 joins = None 3154 self._retreat(index) 3155 3156 kwargs["this"].set("joins", joins if joins else None) 3157 3158 comments = [c for token in (method, side, kind) if token for c in token.comments] 3159 return self.expression(exp.Join, comments=comments, **kwargs) 3160 3161 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3162 this = self._parse_assignment() 3163 3164 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3165 return this 3166 3167 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3168 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3169 3170 return this 3171 3172 def _parse_index_params(self) -> exp.IndexParameters: 3173 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3174 3175 if self._match(TokenType.L_PAREN, advance=False): 3176 columns = self._parse_wrapped_csv(self._parse_with_operator) 3177 else: 3178 columns = None 3179 3180 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3181 partition_by = self._parse_partition_by() 3182 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3183 tablespace = ( 3184 self._parse_var(any_token=True) 3185 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3186 else None 3187 ) 3188 where = self._parse_where() 3189 3190 return self.expression( 3191 exp.IndexParameters, 3192 using=using, 3193 columns=columns, 3194 include=include, 3195 partition_by=partition_by, 3196 where=where, 3197 with_storage=with_storage, 3198 tablespace=tablespace, 3199 ) 3200 3201 def _parse_index( 3202 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3203 ) -> t.Optional[exp.Index]: 3204 if index or anonymous: 3205 unique = None 3206 primary = None 3207 amp = None 3208 3209 self._match(TokenType.ON) 3210 self._match(TokenType.TABLE) # hive 3211 table = self._parse_table_parts(schema=True) 3212 else: 3213 unique = self._match(TokenType.UNIQUE) 3214 primary = self._match_text_seq("PRIMARY") 3215 amp = self._match_text_seq("AMP") 3216 3217 if not self._match(TokenType.INDEX): 3218 return None 3219 3220 index = self._parse_id_var() 3221 table = None 3222 3223 params = self._parse_index_params() 3224 3225 return self.expression( 3226 exp.Index, 3227 this=index, 3228 table=table, 3229 unique=unique, 3230 primary=primary, 3231 amp=amp, 3232 params=params, 3233 ) 3234 3235 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3236 hints: t.List[exp.Expression] = [] 3237 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3238 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3239 hints.append( 3240 self.expression( 3241 exp.WithTableHint, 3242 expressions=self._parse_csv( 3243 lambda: self._parse_function() or self._parse_var(any_token=True) 3244 ), 3245 ) 3246 ) 3247 self._match_r_paren() 3248 else: 3249 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3250 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3251 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3252 3253 self._match_set((TokenType.INDEX, TokenType.KEY)) 3254 if self._match(TokenType.FOR): 3255 hint.set("target", self._advance_any() and self._prev.text.upper()) 3256 3257 hint.set("expressions", self._parse_wrapped_id_vars()) 3258 hints.append(hint) 3259 3260 return hints or None 3261 3262 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3263 return ( 3264 (not schema and self._parse_function(optional_parens=False)) 3265 or self._parse_id_var(any_token=False) 3266 or self._parse_string_as_identifier() 3267 or self._parse_placeholder() 3268 ) 3269 3270 def _parse_table_parts( 3271 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3272 ) -> exp.Table: 3273 catalog = None 3274 db = None 3275 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3276 3277 while self._match(TokenType.DOT): 3278 if catalog: 3279 # This allows nesting the table in arbitrarily many dot expressions if needed 3280 table = self.expression( 3281 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3282 ) 3283 else: 3284 catalog = db 3285 db = table 3286 # "" used for tsql FROM a..b case 3287 table = self._parse_table_part(schema=schema) or "" 3288 3289 if ( 3290 wildcard 3291 and self._is_connected() 3292 and (isinstance(table, exp.Identifier) or not table) 3293 and self._match(TokenType.STAR) 3294 ): 3295 if isinstance(table, exp.Identifier): 3296 table.args["this"] += "*" 3297 else: 3298 table = exp.Identifier(this="*") 3299 3300 # We bubble up comments from the Identifier to the Table 3301 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3302 3303 if is_db_reference: 3304 catalog = db 3305 db = table 3306 table = None 3307 3308 if not table and not is_db_reference: 3309 self.raise_error(f"Expected table name but got {self._curr}") 3310 if not db and is_db_reference: 3311 self.raise_error(f"Expected database name but got {self._curr}") 3312 3313 return self.expression( 3314 exp.Table, 3315 comments=comments, 3316 this=table, 3317 db=db, 3318 catalog=catalog, 3319 pivots=self._parse_pivots(), 3320 ) 3321 3322 def _parse_table( 3323 self, 3324 schema: bool = False, 3325 joins: bool = False, 3326 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3327 parse_bracket: bool = False, 3328 is_db_reference: bool = False, 3329 parse_partition: bool = False, 3330 ) -> t.Optional[exp.Expression]: 3331 lateral = self._parse_lateral() 3332 if lateral: 3333 return lateral 3334 3335 unnest = self._parse_unnest() 3336 if unnest: 3337 return unnest 3338 3339 values = self._parse_derived_table_values() 3340 if values: 3341 return values 3342 3343 subquery = self._parse_select(table=True) 3344 if subquery: 3345 if not subquery.args.get("pivots"): 3346 subquery.set("pivots", self._parse_pivots()) 3347 return subquery 3348 3349 bracket = parse_bracket and self._parse_bracket(None) 3350 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3351 3352 only = self._match(TokenType.ONLY) 3353 3354 this = t.cast( 3355 exp.Expression, 3356 bracket 3357 or self._parse_bracket( 3358 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3359 ), 3360 ) 3361 3362 if only: 3363 this.set("only", only) 3364 3365 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3366 self._match_text_seq("*") 3367 3368 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3369 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3370 this.set("partition", self._parse_partition()) 3371 3372 if schema: 3373 return self._parse_schema(this=this) 3374 3375 version = self._parse_version() 3376 3377 if version: 3378 this.set("version", version) 3379 3380 if self.dialect.ALIAS_POST_TABLESAMPLE: 3381 table_sample = self._parse_table_sample() 3382 3383 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3384 if alias: 3385 this.set("alias", alias) 3386 3387 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3388 return self.expression( 3389 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3390 ) 3391 3392 this.set("hints", self._parse_table_hints()) 3393 3394 if not this.args.get("pivots"): 3395 this.set("pivots", self._parse_pivots()) 3396 3397 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3398 table_sample = self._parse_table_sample() 3399 3400 if table_sample: 3401 table_sample.set("this", this) 3402 this = table_sample 3403 3404 if joins: 3405 for join in self._parse_joins(): 3406 this.append("joins", join) 3407 3408 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3409 this.set("ordinality", True) 3410 this.set("alias", self._parse_table_alias()) 3411 3412 return this 3413 3414 def _parse_version(self) -> t.Optional[exp.Version]: 3415 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3416 this = "TIMESTAMP" 3417 elif self._match(TokenType.VERSION_SNAPSHOT): 3418 this = "VERSION" 3419 else: 3420 return None 3421 3422 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3423 kind = self._prev.text.upper() 3424 start = self._parse_bitwise() 3425 self._match_texts(("TO", "AND")) 3426 end = self._parse_bitwise() 3427 expression: t.Optional[exp.Expression] = self.expression( 3428 exp.Tuple, expressions=[start, end] 3429 ) 3430 elif self._match_text_seq("CONTAINED", "IN"): 3431 kind = "CONTAINED IN" 3432 expression = self.expression( 3433 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3434 ) 3435 elif self._match(TokenType.ALL): 3436 kind = "ALL" 3437 expression = None 3438 else: 3439 self._match_text_seq("AS", "OF") 3440 kind = "AS OF" 3441 expression = self._parse_type() 3442 3443 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3444 3445 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3446 if not self._match(TokenType.UNNEST): 3447 return None 3448 3449 expressions = self._parse_wrapped_csv(self._parse_equality) 3450 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3451 3452 alias = self._parse_table_alias() if with_alias else None 3453 3454 if alias: 3455 if self.dialect.UNNEST_COLUMN_ONLY: 3456 if alias.args.get("columns"): 3457 self.raise_error("Unexpected extra column alias in unnest.") 3458 3459 alias.set("columns", [alias.this]) 3460 alias.set("this", None) 3461 3462 columns = alias.args.get("columns") or [] 3463 if offset and len(expressions) < len(columns): 3464 offset = columns.pop() 3465 3466 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3467 self._match(TokenType.ALIAS) 3468 offset = self._parse_id_var( 3469 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3470 ) or exp.to_identifier("offset") 3471 3472 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3473 3474 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3475 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3476 if not is_derived and not self._match_text_seq("VALUES"): 3477 return None 3478 3479 expressions = self._parse_csv(self._parse_value) 3480 alias = self._parse_table_alias() 3481 3482 if is_derived: 3483 self._match_r_paren() 3484 3485 return self.expression( 3486 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3487 ) 3488 3489 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3490 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3491 as_modifier and self._match_text_seq("USING", "SAMPLE") 3492 ): 3493 return None 3494 3495 bucket_numerator = None 3496 bucket_denominator = None 3497 bucket_field = None 3498 percent = None 3499 size = None 3500 seed = None 3501 3502 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3503 matched_l_paren = self._match(TokenType.L_PAREN) 3504 3505 if self.TABLESAMPLE_CSV: 3506 num = None 3507 expressions = self._parse_csv(self._parse_primary) 3508 else: 3509 expressions = None 3510 num = ( 3511 self._parse_factor() 3512 if self._match(TokenType.NUMBER, advance=False) 3513 else self._parse_primary() or self._parse_placeholder() 3514 ) 3515 3516 if self._match_text_seq("BUCKET"): 3517 bucket_numerator = self._parse_number() 3518 self._match_text_seq("OUT", "OF") 3519 bucket_denominator = bucket_denominator = self._parse_number() 3520 self._match(TokenType.ON) 3521 bucket_field = self._parse_field() 3522 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3523 percent = num 3524 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3525 size = num 3526 else: 3527 percent = num 3528 3529 if matched_l_paren: 3530 self._match_r_paren() 3531 3532 if self._match(TokenType.L_PAREN): 3533 method = self._parse_var(upper=True) 3534 seed = self._match(TokenType.COMMA) and self._parse_number() 3535 self._match_r_paren() 3536 elif self._match_texts(("SEED", "REPEATABLE")): 3537 seed = self._parse_wrapped(self._parse_number) 3538 3539 if not method and self.DEFAULT_SAMPLING_METHOD: 3540 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3541 3542 return self.expression( 3543 exp.TableSample, 3544 expressions=expressions, 3545 method=method, 3546 bucket_numerator=bucket_numerator, 3547 bucket_denominator=bucket_denominator, 3548 bucket_field=bucket_field, 3549 percent=percent, 3550 size=size, 3551 seed=seed, 3552 ) 3553 3554 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3555 return list(iter(self._parse_pivot, None)) or None 3556 3557 def _parse_joins(self) -> t.Iterator[exp.Join]: 3558 return iter(self._parse_join, None) 3559 3560 # https://duckdb.org/docs/sql/statements/pivot 3561 def _parse_simplified_pivot(self) -> exp.Pivot: 3562 def _parse_on() -> t.Optional[exp.Expression]: 3563 this = self._parse_bitwise() 3564 return self._parse_in(this) if self._match(TokenType.IN) else this 3565 3566 this = self._parse_table() 3567 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3568 using = self._match(TokenType.USING) and self._parse_csv( 3569 lambda: self._parse_alias(self._parse_function()) 3570 ) 3571 group = self._parse_group() 3572 return self.expression( 3573 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3574 ) 3575 3576 def _parse_pivot_in(self) -> exp.In: 3577 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3578 this = self._parse_assignment() 3579 3580 self._match(TokenType.ALIAS) 3581 alias = self._parse_field() 3582 if alias: 3583 return self.expression(exp.PivotAlias, this=this, alias=alias) 3584 3585 return this 3586 3587 value = self._parse_column() 3588 3589 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3590 self.raise_error("Expecting IN (") 3591 3592 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3593 3594 self._match_r_paren() 3595 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3596 3597 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3598 index = self._index 3599 include_nulls = None 3600 3601 if self._match(TokenType.PIVOT): 3602 unpivot = False 3603 elif self._match(TokenType.UNPIVOT): 3604 unpivot = True 3605 3606 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3607 if self._match_text_seq("INCLUDE", "NULLS"): 3608 include_nulls = True 3609 elif self._match_text_seq("EXCLUDE", "NULLS"): 3610 include_nulls = False 3611 else: 3612 return None 3613 3614 expressions = [] 3615 3616 if not self._match(TokenType.L_PAREN): 3617 self._retreat(index) 3618 return None 3619 3620 if unpivot: 3621 expressions = self._parse_csv(self._parse_column) 3622 else: 3623 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3624 3625 if not expressions: 3626 self.raise_error("Failed to parse PIVOT's aggregation list") 3627 3628 if not self._match(TokenType.FOR): 3629 self.raise_error("Expecting FOR") 3630 3631 field = self._parse_pivot_in() 3632 3633 self._match_r_paren() 3634 3635 pivot = self.expression( 3636 exp.Pivot, 3637 expressions=expressions, 3638 field=field, 3639 unpivot=unpivot, 3640 include_nulls=include_nulls, 3641 ) 3642 3643 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3644 pivot.set("alias", self._parse_table_alias()) 3645 3646 if not unpivot: 3647 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3648 3649 columns: t.List[exp.Expression] = [] 3650 for fld in pivot.args["field"].expressions: 3651 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3652 for name in names: 3653 if self.PREFIXED_PIVOT_COLUMNS: 3654 name = f"{name}_{field_name}" if name else field_name 3655 else: 3656 name = f"{field_name}_{name}" if name else field_name 3657 3658 columns.append(exp.to_identifier(name)) 3659 3660 pivot.set("columns", columns) 3661 3662 return pivot 3663 3664 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3665 return [agg.alias for agg in aggregations] 3666 3667 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3668 if not skip_where_token and not self._match(TokenType.PREWHERE): 3669 return None 3670 3671 return self.expression( 3672 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3673 ) 3674 3675 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3676 if not skip_where_token and not self._match(TokenType.WHERE): 3677 return None 3678 3679 return self.expression( 3680 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3681 ) 3682 3683 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3684 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3685 return None 3686 3687 elements: t.Dict[str, t.Any] = defaultdict(list) 3688 3689 if self._match(TokenType.ALL): 3690 elements["all"] = True 3691 elif self._match(TokenType.DISTINCT): 3692 elements["all"] = False 3693 3694 while True: 3695 expressions = self._parse_csv( 3696 lambda: None 3697 if self._match(TokenType.ROLLUP, advance=False) 3698 else self._parse_assignment() 3699 ) 3700 if expressions: 3701 elements["expressions"].extend(expressions) 3702 3703 grouping_sets = self._parse_grouping_sets() 3704 if grouping_sets: 3705 elements["grouping_sets"].extend(grouping_sets) 3706 3707 rollup = None 3708 cube = None 3709 totals = None 3710 3711 index = self._index 3712 with_ = self._match(TokenType.WITH) 3713 if self._match(TokenType.ROLLUP): 3714 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3715 elements["rollup"].extend(ensure_list(rollup)) 3716 3717 if self._match(TokenType.CUBE): 3718 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3719 elements["cube"].extend(ensure_list(cube)) 3720 3721 if self._match_text_seq("TOTALS"): 3722 totals = True 3723 elements["totals"] = True # type: ignore 3724 3725 if not (grouping_sets or rollup or cube or totals): 3726 if with_: 3727 self._retreat(index) 3728 break 3729 3730 return self.expression(exp.Group, **elements) # type: ignore 3731 3732 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3733 if not self._match(TokenType.GROUPING_SETS): 3734 return None 3735 3736 return self._parse_wrapped_csv(self._parse_grouping_set) 3737 3738 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3739 if self._match(TokenType.L_PAREN): 3740 grouping_set = self._parse_csv(self._parse_column) 3741 self._match_r_paren() 3742 return self.expression(exp.Tuple, expressions=grouping_set) 3743 3744 return self._parse_column() 3745 3746 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3747 if not skip_having_token and not self._match(TokenType.HAVING): 3748 return None 3749 return self.expression(exp.Having, this=self._parse_assignment()) 3750 3751 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3752 if not self._match(TokenType.QUALIFY): 3753 return None 3754 return self.expression(exp.Qualify, this=self._parse_assignment()) 3755 3756 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3757 if skip_start_token: 3758 start = None 3759 elif self._match(TokenType.START_WITH): 3760 start = self._parse_assignment() 3761 else: 3762 return None 3763 3764 self._match(TokenType.CONNECT_BY) 3765 nocycle = self._match_text_seq("NOCYCLE") 3766 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3767 exp.Prior, this=self._parse_bitwise() 3768 ) 3769 connect = self._parse_assignment() 3770 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3771 3772 if not start and self._match(TokenType.START_WITH): 3773 start = self._parse_assignment() 3774 3775 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3776 3777 def _parse_name_as_expression(self) -> exp.Alias: 3778 return self.expression( 3779 exp.Alias, 3780 alias=self._parse_id_var(any_token=True), 3781 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3782 ) 3783 3784 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3785 if self._match_text_seq("INTERPOLATE"): 3786 return self._parse_wrapped_csv(self._parse_name_as_expression) 3787 return None 3788 3789 def _parse_order( 3790 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3791 ) -> t.Optional[exp.Expression]: 3792 siblings = None 3793 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3794 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3795 return this 3796 3797 siblings = True 3798 3799 return self.expression( 3800 exp.Order, 3801 this=this, 3802 expressions=self._parse_csv(self._parse_ordered), 3803 interpolate=self._parse_interpolate(), 3804 siblings=siblings, 3805 ) 3806 3807 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3808 if not self._match(token): 3809 return None 3810 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3811 3812 def _parse_ordered( 3813 self, parse_method: t.Optional[t.Callable] = None 3814 ) -> t.Optional[exp.Ordered]: 3815 this = parse_method() if parse_method else self._parse_assignment() 3816 if not this: 3817 return None 3818 3819 asc = self._match(TokenType.ASC) 3820 desc = self._match(TokenType.DESC) or (asc and False) 3821 3822 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3823 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3824 3825 nulls_first = is_nulls_first or False 3826 explicitly_null_ordered = is_nulls_first or is_nulls_last 3827 3828 if ( 3829 not explicitly_null_ordered 3830 and ( 3831 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3832 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3833 ) 3834 and self.dialect.NULL_ORDERING != "nulls_are_last" 3835 ): 3836 nulls_first = True 3837 3838 if self._match_text_seq("WITH", "FILL"): 3839 with_fill = self.expression( 3840 exp.WithFill, 3841 **{ # type: ignore 3842 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3843 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3844 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3845 }, 3846 ) 3847 else: 3848 with_fill = None 3849 3850 return self.expression( 3851 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3852 ) 3853 3854 def _parse_limit( 3855 self, 3856 this: t.Optional[exp.Expression] = None, 3857 top: bool = False, 3858 skip_limit_token: bool = False, 3859 ) -> t.Optional[exp.Expression]: 3860 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3861 comments = self._prev_comments 3862 if top: 3863 limit_paren = self._match(TokenType.L_PAREN) 3864 expression = self._parse_term() if limit_paren else self._parse_number() 3865 3866 if limit_paren: 3867 self._match_r_paren() 3868 else: 3869 expression = self._parse_term() 3870 3871 if self._match(TokenType.COMMA): 3872 offset = expression 3873 expression = self._parse_term() 3874 else: 3875 offset = None 3876 3877 limit_exp = self.expression( 3878 exp.Limit, 3879 this=this, 3880 expression=expression, 3881 offset=offset, 3882 comments=comments, 3883 expressions=self._parse_limit_by(), 3884 ) 3885 3886 return limit_exp 3887 3888 if self._match(TokenType.FETCH): 3889 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3890 direction = self._prev.text.upper() if direction else "FIRST" 3891 3892 count = self._parse_field(tokens=self.FETCH_TOKENS) 3893 percent = self._match(TokenType.PERCENT) 3894 3895 self._match_set((TokenType.ROW, TokenType.ROWS)) 3896 3897 only = self._match_text_seq("ONLY") 3898 with_ties = self._match_text_seq("WITH", "TIES") 3899 3900 if only and with_ties: 3901 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3902 3903 return self.expression( 3904 exp.Fetch, 3905 direction=direction, 3906 count=count, 3907 percent=percent, 3908 with_ties=with_ties, 3909 ) 3910 3911 return this 3912 3913 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3914 if not self._match(TokenType.OFFSET): 3915 return this 3916 3917 count = self._parse_term() 3918 self._match_set((TokenType.ROW, TokenType.ROWS)) 3919 3920 return self.expression( 3921 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3922 ) 3923 3924 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3925 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3926 3927 def _parse_locks(self) -> t.List[exp.Lock]: 3928 locks = [] 3929 while True: 3930 if self._match_text_seq("FOR", "UPDATE"): 3931 update = True 3932 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3933 "LOCK", "IN", "SHARE", "MODE" 3934 ): 3935 update = False 3936 else: 3937 break 3938 3939 expressions = None 3940 if self._match_text_seq("OF"): 3941 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3942 3943 wait: t.Optional[bool | exp.Expression] = None 3944 if self._match_text_seq("NOWAIT"): 3945 wait = True 3946 elif self._match_text_seq("WAIT"): 3947 wait = self._parse_primary() 3948 elif self._match_text_seq("SKIP", "LOCKED"): 3949 wait = False 3950 3951 locks.append( 3952 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3953 ) 3954 3955 return locks 3956 3957 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3958 while this and self._match_set(self.SET_OPERATIONS): 3959 token_type = self._prev.token_type 3960 3961 if token_type == TokenType.UNION: 3962 operation = exp.Union 3963 elif token_type == TokenType.EXCEPT: 3964 operation = exp.Except 3965 else: 3966 operation = exp.Intersect 3967 3968 comments = self._prev.comments 3969 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3970 by_name = self._match_text_seq("BY", "NAME") 3971 expression = self._parse_select(nested=True, parse_set_operation=False) 3972 3973 this = self.expression( 3974 operation, 3975 comments=comments, 3976 this=this, 3977 distinct=distinct, 3978 by_name=by_name, 3979 expression=expression, 3980 ) 3981 3982 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3983 expression = this.expression 3984 3985 if expression: 3986 for arg in self.UNION_MODIFIERS: 3987 expr = expression.args.get(arg) 3988 if expr: 3989 this.set(arg, expr.pop()) 3990 3991 return this 3992 3993 def _parse_expression(self) -> t.Optional[exp.Expression]: 3994 return self._parse_alias(self._parse_assignment()) 3995 3996 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3997 this = self._parse_disjunction() 3998 3999 while self._match_set(self.ASSIGNMENT): 4000 this = self.expression( 4001 self.ASSIGNMENT[self._prev.token_type], 4002 this=this, 4003 comments=self._prev_comments, 4004 expression=self._parse_assignment(), 4005 ) 4006 4007 return this 4008 4009 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4010 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4011 4012 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4013 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4014 4015 def _parse_equality(self) -> t.Optional[exp.Expression]: 4016 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4017 4018 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4019 return self._parse_tokens(self._parse_range, self.COMPARISON) 4020 4021 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4022 this = this or self._parse_bitwise() 4023 negate = self._match(TokenType.NOT) 4024 4025 if self._match_set(self.RANGE_PARSERS): 4026 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4027 if not expression: 4028 return this 4029 4030 this = expression 4031 elif self._match(TokenType.ISNULL): 4032 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4033 4034 # Postgres supports ISNULL and NOTNULL for conditions. 4035 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4036 if self._match(TokenType.NOTNULL): 4037 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4038 this = self.expression(exp.Not, this=this) 4039 4040 if negate: 4041 this = self.expression(exp.Not, this=this) 4042 4043 if self._match(TokenType.IS): 4044 this = self._parse_is(this) 4045 4046 return this 4047 4048 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4049 index = self._index - 1 4050 negate = self._match(TokenType.NOT) 4051 4052 if self._match_text_seq("DISTINCT", "FROM"): 4053 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4054 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4055 4056 expression = self._parse_null() or self._parse_boolean() 4057 if not expression: 4058 self._retreat(index) 4059 return None 4060 4061 this = self.expression(exp.Is, this=this, expression=expression) 4062 return self.expression(exp.Not, this=this) if negate else this 4063 4064 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4065 unnest = self._parse_unnest(with_alias=False) 4066 if unnest: 4067 this = self.expression(exp.In, this=this, unnest=unnest) 4068 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4069 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4070 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4071 4072 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4073 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4074 else: 4075 this = self.expression(exp.In, this=this, expressions=expressions) 4076 4077 if matched_l_paren: 4078 self._match_r_paren(this) 4079 elif not self._match(TokenType.R_BRACKET, expression=this): 4080 self.raise_error("Expecting ]") 4081 else: 4082 this = self.expression(exp.In, this=this, field=self._parse_field()) 4083 4084 return this 4085 4086 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4087 low = self._parse_bitwise() 4088 self._match(TokenType.AND) 4089 high = self._parse_bitwise() 4090 return self.expression(exp.Between, this=this, low=low, high=high) 4091 4092 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4093 if not self._match(TokenType.ESCAPE): 4094 return this 4095 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4096 4097 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4098 index = self._index 4099 4100 if not self._match(TokenType.INTERVAL) and match_interval: 4101 return None 4102 4103 if self._match(TokenType.STRING, advance=False): 4104 this = self._parse_primary() 4105 else: 4106 this = self._parse_term() 4107 4108 if not this or ( 4109 isinstance(this, exp.Column) 4110 and not this.table 4111 and not this.this.quoted 4112 and this.name.upper() == "IS" 4113 ): 4114 self._retreat(index) 4115 return None 4116 4117 unit = self._parse_function() or ( 4118 not self._match(TokenType.ALIAS, advance=False) 4119 and self._parse_var(any_token=True, upper=True) 4120 ) 4121 4122 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4123 # each INTERVAL expression into this canonical form so it's easy to transpile 4124 if this and this.is_number: 4125 this = exp.Literal.string(this.name) 4126 elif this and this.is_string: 4127 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4128 if len(parts) == 1: 4129 if unit: 4130 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4131 self._retreat(self._index - 1) 4132 4133 this = exp.Literal.string(parts[0][0]) 4134 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4135 4136 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4137 unit = self.expression( 4138 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4139 ) 4140 4141 interval = self.expression(exp.Interval, this=this, unit=unit) 4142 4143 index = self._index 4144 self._match(TokenType.PLUS) 4145 4146 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4147 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4148 return self.expression( 4149 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4150 ) 4151 4152 self._retreat(index) 4153 return interval 4154 4155 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4156 this = self._parse_term() 4157 4158 while True: 4159 if self._match_set(self.BITWISE): 4160 this = self.expression( 4161 self.BITWISE[self._prev.token_type], 4162 this=this, 4163 expression=self._parse_term(), 4164 ) 4165 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4166 this = self.expression( 4167 exp.DPipe, 4168 this=this, 4169 expression=self._parse_term(), 4170 safe=not self.dialect.STRICT_STRING_CONCAT, 4171 ) 4172 elif self._match(TokenType.DQMARK): 4173 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4174 elif self._match_pair(TokenType.LT, TokenType.LT): 4175 this = self.expression( 4176 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4177 ) 4178 elif self._match_pair(TokenType.GT, TokenType.GT): 4179 this = self.expression( 4180 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4181 ) 4182 else: 4183 break 4184 4185 return this 4186 4187 def _parse_term(self) -> t.Optional[exp.Expression]: 4188 return self._parse_tokens(self._parse_factor, self.TERM) 4189 4190 def _parse_factor(self) -> t.Optional[exp.Expression]: 4191 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4192 this = parse_method() 4193 4194 while self._match_set(self.FACTOR): 4195 klass = self.FACTOR[self._prev.token_type] 4196 comments = self._prev_comments 4197 expression = parse_method() 4198 4199 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4200 self._retreat(self._index - 1) 4201 return this 4202 4203 this = self.expression(klass, this=this, comments=comments, expression=expression) 4204 4205 if isinstance(this, exp.Div): 4206 this.args["typed"] = self.dialect.TYPED_DIVISION 4207 this.args["safe"] = self.dialect.SAFE_DIVISION 4208 4209 return this 4210 4211 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4212 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4213 4214 def _parse_unary(self) -> t.Optional[exp.Expression]: 4215 if self._match_set(self.UNARY_PARSERS): 4216 return self.UNARY_PARSERS[self._prev.token_type](self) 4217 return self._parse_at_time_zone(self._parse_type()) 4218 4219 def _parse_type( 4220 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4221 ) -> t.Optional[exp.Expression]: 4222 interval = parse_interval and self._parse_interval() 4223 if interval: 4224 return interval 4225 4226 index = self._index 4227 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4228 4229 if data_type: 4230 index2 = self._index 4231 this = self._parse_primary() 4232 4233 if isinstance(this, exp.Literal): 4234 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4235 if parser: 4236 return parser(self, this, data_type) 4237 4238 return self.expression(exp.Cast, this=this, to=data_type) 4239 4240 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4241 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4242 # 4243 # If the index difference here is greater than 1, that means the parser itself must have 4244 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4245 # 4246 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4247 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4248 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4249 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4250 # 4251 # In these cases, we don't really want to return the converted type, but instead retreat 4252 # and try to parse a Column or Identifier in the section below. 4253 if data_type.expressions and index2 - index > 1: 4254 self._retreat(index2) 4255 return self._parse_column_ops(data_type) 4256 4257 self._retreat(index) 4258 4259 if fallback_to_identifier: 4260 return self._parse_id_var() 4261 4262 this = self._parse_column() 4263 return this and self._parse_column_ops(this) 4264 4265 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4266 this = self._parse_type() 4267 if not this: 4268 return None 4269 4270 if isinstance(this, exp.Column) and not this.table: 4271 this = exp.var(this.name.upper()) 4272 4273 return self.expression( 4274 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4275 ) 4276 4277 def _parse_types( 4278 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4279 ) -> t.Optional[exp.Expression]: 4280 index = self._index 4281 4282 this: t.Optional[exp.Expression] = None 4283 prefix = self._match_text_seq("SYSUDTLIB", ".") 4284 4285 if not self._match_set(self.TYPE_TOKENS): 4286 identifier = allow_identifiers and self._parse_id_var( 4287 any_token=False, tokens=(TokenType.VAR,) 4288 ) 4289 if identifier: 4290 tokens = self.dialect.tokenize(identifier.name) 4291 4292 if len(tokens) != 1: 4293 self.raise_error("Unexpected identifier", self._prev) 4294 4295 if tokens[0].token_type in self.TYPE_TOKENS: 4296 self._prev = tokens[0] 4297 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4298 type_name = identifier.name 4299 4300 while self._match(TokenType.DOT): 4301 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4302 4303 this = exp.DataType.build(type_name, udt=True) 4304 else: 4305 self._retreat(self._index - 1) 4306 return None 4307 else: 4308 return None 4309 4310 type_token = self._prev.token_type 4311 4312 if type_token == TokenType.PSEUDO_TYPE: 4313 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4314 4315 if type_token == TokenType.OBJECT_IDENTIFIER: 4316 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4317 4318 # https://materialize.com/docs/sql/types/map/ 4319 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4320 key_type = self._parse_types( 4321 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4322 ) 4323 if not self._match(TokenType.FARROW): 4324 self._retreat(index) 4325 return None 4326 4327 value_type = self._parse_types( 4328 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4329 ) 4330 if not self._match(TokenType.R_BRACKET): 4331 self._retreat(index) 4332 return None 4333 4334 return exp.DataType( 4335 this=exp.DataType.Type.MAP, 4336 expressions=[key_type, value_type], 4337 nested=True, 4338 prefix=prefix, 4339 ) 4340 4341 nested = type_token in self.NESTED_TYPE_TOKENS 4342 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4343 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4344 expressions = None 4345 maybe_func = False 4346 4347 if self._match(TokenType.L_PAREN): 4348 if is_struct: 4349 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4350 elif nested: 4351 expressions = self._parse_csv( 4352 lambda: self._parse_types( 4353 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4354 ) 4355 ) 4356 elif type_token in self.ENUM_TYPE_TOKENS: 4357 expressions = self._parse_csv(self._parse_equality) 4358 elif is_aggregate: 4359 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4360 any_token=False, tokens=(TokenType.VAR,) 4361 ) 4362 if not func_or_ident or not self._match(TokenType.COMMA): 4363 return None 4364 expressions = self._parse_csv( 4365 lambda: self._parse_types( 4366 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4367 ) 4368 ) 4369 expressions.insert(0, func_or_ident) 4370 else: 4371 expressions = self._parse_csv(self._parse_type_size) 4372 4373 if not expressions or not self._match(TokenType.R_PAREN): 4374 self._retreat(index) 4375 return None 4376 4377 maybe_func = True 4378 4379 values: t.Optional[t.List[exp.Expression]] = None 4380 4381 if nested and self._match(TokenType.LT): 4382 if is_struct: 4383 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4384 else: 4385 expressions = self._parse_csv( 4386 lambda: self._parse_types( 4387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4388 ) 4389 ) 4390 4391 if not self._match(TokenType.GT): 4392 self.raise_error("Expecting >") 4393 4394 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4395 values = self._parse_csv(self._parse_assignment) 4396 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4397 4398 if type_token in self.TIMESTAMPS: 4399 if self._match_text_seq("WITH", "TIME", "ZONE"): 4400 maybe_func = False 4401 tz_type = ( 4402 exp.DataType.Type.TIMETZ 4403 if type_token in self.TIMES 4404 else exp.DataType.Type.TIMESTAMPTZ 4405 ) 4406 this = exp.DataType(this=tz_type, expressions=expressions) 4407 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4408 maybe_func = False 4409 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4410 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4411 maybe_func = False 4412 elif type_token == TokenType.INTERVAL: 4413 unit = self._parse_var(upper=True) 4414 if unit: 4415 if self._match_text_seq("TO"): 4416 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4417 4418 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4419 else: 4420 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4421 4422 if maybe_func and check_func: 4423 index2 = self._index 4424 peek = self._parse_string() 4425 4426 if not peek: 4427 self._retreat(index) 4428 return None 4429 4430 self._retreat(index2) 4431 4432 if not this: 4433 if self._match_text_seq("UNSIGNED"): 4434 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4435 if not unsigned_type_token: 4436 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4437 4438 type_token = unsigned_type_token or type_token 4439 4440 this = exp.DataType( 4441 this=exp.DataType.Type[type_token.value], 4442 expressions=expressions, 4443 nested=nested, 4444 values=values, 4445 prefix=prefix, 4446 ) 4447 elif expressions: 4448 this.set("expressions", expressions) 4449 4450 # https://materialize.com/docs/sql/types/list/#type-name 4451 while self._match(TokenType.LIST): 4452 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4453 4454 index = self._index 4455 4456 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4457 matched_array = self._match(TokenType.ARRAY) 4458 4459 while self._curr: 4460 matched_l_bracket = self._match(TokenType.L_BRACKET) 4461 if not matched_l_bracket and not matched_array: 4462 break 4463 4464 matched_array = False 4465 values = self._parse_csv(self._parse_assignment) or None 4466 if values and not schema: 4467 self._retreat(index) 4468 break 4469 4470 this = exp.DataType( 4471 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4472 ) 4473 self._match(TokenType.R_BRACKET) 4474 4475 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4476 converter = self.TYPE_CONVERTERS.get(this.this) 4477 if converter: 4478 this = converter(t.cast(exp.DataType, this)) 4479 4480 return this 4481 4482 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4483 index = self._index 4484 this = ( 4485 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4486 or self._parse_id_var() 4487 ) 4488 self._match(TokenType.COLON) 4489 4490 if ( 4491 type_required 4492 and not isinstance(this, exp.DataType) 4493 and not self._match_set(self.TYPE_TOKENS, advance=False) 4494 ): 4495 self._retreat(index) 4496 return self._parse_types() 4497 4498 return self._parse_column_def(this) 4499 4500 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4501 if not self._match_text_seq("AT", "TIME", "ZONE"): 4502 return this 4503 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4504 4505 def _parse_column(self) -> t.Optional[exp.Expression]: 4506 this = self._parse_column_reference() 4507 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4508 4509 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4510 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4511 4512 return column 4513 4514 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4515 this = self._parse_field() 4516 if ( 4517 not this 4518 and self._match(TokenType.VALUES, advance=False) 4519 and self.VALUES_FOLLOWED_BY_PAREN 4520 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4521 ): 4522 this = self._parse_id_var() 4523 4524 if isinstance(this, exp.Identifier): 4525 # We bubble up comments from the Identifier to the Column 4526 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4527 4528 return this 4529 4530 def _parse_colon_as_json_extract( 4531 self, this: t.Optional[exp.Expression] 4532 ) -> t.Optional[exp.Expression]: 4533 casts = [] 4534 json_path = [] 4535 4536 while self._match(TokenType.COLON): 4537 start_index = self._index 4538 4539 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4540 path = self._parse_column_ops( 4541 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4542 ) 4543 4544 # The cast :: operator has a lower precedence than the extraction operator :, so 4545 # we rearrange the AST appropriately to avoid casting the JSON path 4546 while isinstance(path, exp.Cast): 4547 casts.append(path.to) 4548 path = path.this 4549 4550 if casts: 4551 dcolon_offset = next( 4552 i 4553 for i, t in enumerate(self._tokens[start_index:]) 4554 if t.token_type == TokenType.DCOLON 4555 ) 4556 end_token = self._tokens[start_index + dcolon_offset - 1] 4557 else: 4558 end_token = self._prev 4559 4560 if path: 4561 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4562 4563 if json_path: 4564 this = self.expression( 4565 exp.JSONExtract, 4566 this=this, 4567 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4568 ) 4569 4570 while casts: 4571 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4572 4573 return this 4574 4575 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4576 this = self._parse_bracket(this) 4577 4578 while self._match_set(self.COLUMN_OPERATORS): 4579 op_token = self._prev.token_type 4580 op = self.COLUMN_OPERATORS.get(op_token) 4581 4582 if op_token == TokenType.DCOLON: 4583 field = self._parse_types() 4584 if not field: 4585 self.raise_error("Expected type") 4586 elif op and self._curr: 4587 field = self._parse_column_reference() 4588 else: 4589 field = self._parse_field(any_token=True, anonymous_func=True) 4590 4591 if isinstance(field, exp.Func) and this: 4592 # bigquery allows function calls like x.y.count(...) 4593 # SAFE.SUBSTR(...) 4594 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4595 this = exp.replace_tree( 4596 this, 4597 lambda n: ( 4598 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4599 if n.table 4600 else n.this 4601 ) 4602 if isinstance(n, exp.Column) 4603 else n, 4604 ) 4605 4606 if op: 4607 this = op(self, this, field) 4608 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4609 this = self.expression( 4610 exp.Column, 4611 this=field, 4612 table=this.this, 4613 db=this.args.get("table"), 4614 catalog=this.args.get("db"), 4615 ) 4616 else: 4617 this = self.expression(exp.Dot, this=this, expression=field) 4618 4619 this = self._parse_bracket(this) 4620 4621 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4622 4623 def _parse_primary(self) -> t.Optional[exp.Expression]: 4624 if self._match_set(self.PRIMARY_PARSERS): 4625 token_type = self._prev.token_type 4626 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4627 4628 if token_type == TokenType.STRING: 4629 expressions = [primary] 4630 while self._match(TokenType.STRING): 4631 expressions.append(exp.Literal.string(self._prev.text)) 4632 4633 if len(expressions) > 1: 4634 return self.expression(exp.Concat, expressions=expressions) 4635 4636 return primary 4637 4638 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4639 return exp.Literal.number(f"0.{self._prev.text}") 4640 4641 if self._match(TokenType.L_PAREN): 4642 comments = self._prev_comments 4643 query = self._parse_select() 4644 4645 if query: 4646 expressions = [query] 4647 else: 4648 expressions = self._parse_expressions() 4649 4650 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4651 4652 if not this and self._match(TokenType.R_PAREN, advance=False): 4653 this = self.expression(exp.Tuple) 4654 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4655 this = self._parse_subquery(this=this, parse_alias=False) 4656 elif isinstance(this, exp.Subquery): 4657 this = self._parse_subquery( 4658 this=self._parse_set_operations(this), parse_alias=False 4659 ) 4660 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4661 this = self.expression(exp.Tuple, expressions=expressions) 4662 else: 4663 this = self.expression(exp.Paren, this=this) 4664 4665 if this: 4666 this.add_comments(comments) 4667 4668 self._match_r_paren(expression=this) 4669 return this 4670 4671 return None 4672 4673 def _parse_field( 4674 self, 4675 any_token: bool = False, 4676 tokens: t.Optional[t.Collection[TokenType]] = None, 4677 anonymous_func: bool = False, 4678 ) -> t.Optional[exp.Expression]: 4679 if anonymous_func: 4680 field = ( 4681 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4682 or self._parse_primary() 4683 ) 4684 else: 4685 field = self._parse_primary() or self._parse_function( 4686 anonymous=anonymous_func, any_token=any_token 4687 ) 4688 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4689 4690 def _parse_function( 4691 self, 4692 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4693 anonymous: bool = False, 4694 optional_parens: bool = True, 4695 any_token: bool = False, 4696 ) -> t.Optional[exp.Expression]: 4697 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4698 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4699 fn_syntax = False 4700 if ( 4701 self._match(TokenType.L_BRACE, advance=False) 4702 and self._next 4703 and self._next.text.upper() == "FN" 4704 ): 4705 self._advance(2) 4706 fn_syntax = True 4707 4708 func = self._parse_function_call( 4709 functions=functions, 4710 anonymous=anonymous, 4711 optional_parens=optional_parens, 4712 any_token=any_token, 4713 ) 4714 4715 if fn_syntax: 4716 self._match(TokenType.R_BRACE) 4717 4718 return func 4719 4720 def _parse_function_call( 4721 self, 4722 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4723 anonymous: bool = False, 4724 optional_parens: bool = True, 4725 any_token: bool = False, 4726 ) -> t.Optional[exp.Expression]: 4727 if not self._curr: 4728 return None 4729 4730 comments = self._curr.comments 4731 token_type = self._curr.token_type 4732 this = self._curr.text 4733 upper = this.upper() 4734 4735 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4736 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4737 self._advance() 4738 return self._parse_window(parser(self)) 4739 4740 if not self._next or self._next.token_type != TokenType.L_PAREN: 4741 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4742 self._advance() 4743 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4744 4745 return None 4746 4747 if any_token: 4748 if token_type in self.RESERVED_TOKENS: 4749 return None 4750 elif token_type not in self.FUNC_TOKENS: 4751 return None 4752 4753 self._advance(2) 4754 4755 parser = self.FUNCTION_PARSERS.get(upper) 4756 if parser and not anonymous: 4757 this = parser(self) 4758 else: 4759 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4760 4761 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4762 this = self.expression(subquery_predicate, this=self._parse_select()) 4763 self._match_r_paren() 4764 return this 4765 4766 if functions is None: 4767 functions = self.FUNCTIONS 4768 4769 function = functions.get(upper) 4770 4771 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4772 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4773 4774 if alias: 4775 args = self._kv_to_prop_eq(args) 4776 4777 if function and not anonymous: 4778 if "dialect" in function.__code__.co_varnames: 4779 func = function(args, dialect=self.dialect) 4780 else: 4781 func = function(args) 4782 4783 func = self.validate_expression(func, args) 4784 if not self.dialect.NORMALIZE_FUNCTIONS: 4785 func.meta["name"] = this 4786 4787 this = func 4788 else: 4789 if token_type == TokenType.IDENTIFIER: 4790 this = exp.Identifier(this=this, quoted=True) 4791 this = self.expression(exp.Anonymous, this=this, expressions=args) 4792 4793 if isinstance(this, exp.Expression): 4794 this.add_comments(comments) 4795 4796 self._match_r_paren(this) 4797 return self._parse_window(this) 4798 4799 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4800 transformed = [] 4801 4802 for e in expressions: 4803 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4804 if isinstance(e, exp.Alias): 4805 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4806 4807 if not isinstance(e, exp.PropertyEQ): 4808 e = self.expression( 4809 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4810 ) 4811 4812 if isinstance(e.this, exp.Column): 4813 e.this.replace(e.this.this) 4814 4815 transformed.append(e) 4816 4817 return transformed 4818 4819 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4820 return self._parse_column_def(self._parse_id_var()) 4821 4822 def _parse_user_defined_function( 4823 self, kind: t.Optional[TokenType] = None 4824 ) -> t.Optional[exp.Expression]: 4825 this = self._parse_id_var() 4826 4827 while self._match(TokenType.DOT): 4828 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4829 4830 if not self._match(TokenType.L_PAREN): 4831 return this 4832 4833 expressions = self._parse_csv(self._parse_function_parameter) 4834 self._match_r_paren() 4835 return self.expression( 4836 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4837 ) 4838 4839 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4840 literal = self._parse_primary() 4841 if literal: 4842 return self.expression(exp.Introducer, this=token.text, expression=literal) 4843 4844 return self.expression(exp.Identifier, this=token.text) 4845 4846 def _parse_session_parameter(self) -> exp.SessionParameter: 4847 kind = None 4848 this = self._parse_id_var() or self._parse_primary() 4849 4850 if this and self._match(TokenType.DOT): 4851 kind = this.name 4852 this = self._parse_var() or self._parse_primary() 4853 4854 return self.expression(exp.SessionParameter, this=this, kind=kind) 4855 4856 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4857 return self._parse_id_var() 4858 4859 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4860 index = self._index 4861 4862 if self._match(TokenType.L_PAREN): 4863 expressions = t.cast( 4864 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4865 ) 4866 4867 if not self._match(TokenType.R_PAREN): 4868 self._retreat(index) 4869 else: 4870 expressions = [self._parse_lambda_arg()] 4871 4872 if self._match_set(self.LAMBDAS): 4873 return self.LAMBDAS[self._prev.token_type](self, expressions) 4874 4875 self._retreat(index) 4876 4877 this: t.Optional[exp.Expression] 4878 4879 if self._match(TokenType.DISTINCT): 4880 this = self.expression( 4881 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4882 ) 4883 else: 4884 this = self._parse_select_or_expression(alias=alias) 4885 4886 return self._parse_limit( 4887 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4888 ) 4889 4890 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4891 index = self._index 4892 if not self._match(TokenType.L_PAREN): 4893 return this 4894 4895 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4896 # expr can be of both types 4897 if self._match_set(self.SELECT_START_TOKENS): 4898 self._retreat(index) 4899 return this 4900 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4901 self._match_r_paren() 4902 return self.expression(exp.Schema, this=this, expressions=args) 4903 4904 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4905 return self._parse_column_def(self._parse_field(any_token=True)) 4906 4907 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4908 # column defs are not really columns, they're identifiers 4909 if isinstance(this, exp.Column): 4910 this = this.this 4911 4912 kind = self._parse_types(schema=True) 4913 4914 if self._match_text_seq("FOR", "ORDINALITY"): 4915 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4916 4917 constraints: t.List[exp.Expression] = [] 4918 4919 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4920 ("ALIAS", "MATERIALIZED") 4921 ): 4922 persisted = self._prev.text.upper() == "MATERIALIZED" 4923 constraints.append( 4924 self.expression( 4925 exp.ComputedColumnConstraint, 4926 this=self._parse_assignment(), 4927 persisted=persisted or self._match_text_seq("PERSISTED"), 4928 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4929 ) 4930 ) 4931 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4932 self._match(TokenType.ALIAS) 4933 constraints.append( 4934 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4935 ) 4936 4937 while True: 4938 constraint = self._parse_column_constraint() 4939 if not constraint: 4940 break 4941 constraints.append(constraint) 4942 4943 if not kind and not constraints: 4944 return this 4945 4946 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4947 4948 def _parse_auto_increment( 4949 self, 4950 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4951 start = None 4952 increment = None 4953 4954 if self._match(TokenType.L_PAREN, advance=False): 4955 args = self._parse_wrapped_csv(self._parse_bitwise) 4956 start = seq_get(args, 0) 4957 increment = seq_get(args, 1) 4958 elif self._match_text_seq("START"): 4959 start = self._parse_bitwise() 4960 self._match_text_seq("INCREMENT") 4961 increment = self._parse_bitwise() 4962 4963 if start and increment: 4964 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4965 4966 return exp.AutoIncrementColumnConstraint() 4967 4968 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4969 if not self._match_text_seq("REFRESH"): 4970 self._retreat(self._index - 1) 4971 return None 4972 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4973 4974 def _parse_compress(self) -> exp.CompressColumnConstraint: 4975 if self._match(TokenType.L_PAREN, advance=False): 4976 return self.expression( 4977 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4978 ) 4979 4980 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4981 4982 def _parse_generated_as_identity( 4983 self, 4984 ) -> ( 4985 exp.GeneratedAsIdentityColumnConstraint 4986 | exp.ComputedColumnConstraint 4987 | exp.GeneratedAsRowColumnConstraint 4988 ): 4989 if self._match_text_seq("BY", "DEFAULT"): 4990 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4991 this = self.expression( 4992 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4993 ) 4994 else: 4995 self._match_text_seq("ALWAYS") 4996 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4997 4998 self._match(TokenType.ALIAS) 4999 5000 if self._match_text_seq("ROW"): 5001 start = self._match_text_seq("START") 5002 if not start: 5003 self._match(TokenType.END) 5004 hidden = self._match_text_seq("HIDDEN") 5005 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5006 5007 identity = self._match_text_seq("IDENTITY") 5008 5009 if self._match(TokenType.L_PAREN): 5010 if self._match(TokenType.START_WITH): 5011 this.set("start", self._parse_bitwise()) 5012 if self._match_text_seq("INCREMENT", "BY"): 5013 this.set("increment", self._parse_bitwise()) 5014 if self._match_text_seq("MINVALUE"): 5015 this.set("minvalue", self._parse_bitwise()) 5016 if self._match_text_seq("MAXVALUE"): 5017 this.set("maxvalue", self._parse_bitwise()) 5018 5019 if self._match_text_seq("CYCLE"): 5020 this.set("cycle", True) 5021 elif self._match_text_seq("NO", "CYCLE"): 5022 this.set("cycle", False) 5023 5024 if not identity: 5025 this.set("expression", self._parse_range()) 5026 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5027 args = self._parse_csv(self._parse_bitwise) 5028 this.set("start", seq_get(args, 0)) 5029 this.set("increment", seq_get(args, 1)) 5030 5031 self._match_r_paren() 5032 5033 return this 5034 5035 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5036 self._match_text_seq("LENGTH") 5037 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5038 5039 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5040 if self._match_text_seq("NULL"): 5041 return self.expression(exp.NotNullColumnConstraint) 5042 if self._match_text_seq("CASESPECIFIC"): 5043 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5044 if self._match_text_seq("FOR", "REPLICATION"): 5045 return self.expression(exp.NotForReplicationColumnConstraint) 5046 return None 5047 5048 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5049 if self._match(TokenType.CONSTRAINT): 5050 this = self._parse_id_var() 5051 else: 5052 this = None 5053 5054 if self._match_texts(self.CONSTRAINT_PARSERS): 5055 return self.expression( 5056 exp.ColumnConstraint, 5057 this=this, 5058 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5059 ) 5060 5061 return this 5062 5063 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5064 if not self._match(TokenType.CONSTRAINT): 5065 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5066 5067 return self.expression( 5068 exp.Constraint, 5069 this=self._parse_id_var(), 5070 expressions=self._parse_unnamed_constraints(), 5071 ) 5072 5073 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5074 constraints = [] 5075 while True: 5076 constraint = self._parse_unnamed_constraint() or self._parse_function() 5077 if not constraint: 5078 break 5079 constraints.append(constraint) 5080 5081 return constraints 5082 5083 def _parse_unnamed_constraint( 5084 self, constraints: t.Optional[t.Collection[str]] = None 5085 ) -> t.Optional[exp.Expression]: 5086 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5087 constraints or self.CONSTRAINT_PARSERS 5088 ): 5089 return None 5090 5091 constraint = self._prev.text.upper() 5092 if constraint not in self.CONSTRAINT_PARSERS: 5093 self.raise_error(f"No parser found for schema constraint {constraint}.") 5094 5095 return self.CONSTRAINT_PARSERS[constraint](self) 5096 5097 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5098 self._match_text_seq("KEY") 5099 return self.expression( 5100 exp.UniqueColumnConstraint, 5101 this=self._parse_schema(self._parse_id_var(any_token=False)), 5102 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5103 on_conflict=self._parse_on_conflict(), 5104 ) 5105 5106 def _parse_key_constraint_options(self) -> t.List[str]: 5107 options = [] 5108 while True: 5109 if not self._curr: 5110 break 5111 5112 if self._match(TokenType.ON): 5113 action = None 5114 on = self._advance_any() and self._prev.text 5115 5116 if self._match_text_seq("NO", "ACTION"): 5117 action = "NO ACTION" 5118 elif self._match_text_seq("CASCADE"): 5119 action = "CASCADE" 5120 elif self._match_text_seq("RESTRICT"): 5121 action = "RESTRICT" 5122 elif self._match_pair(TokenType.SET, TokenType.NULL): 5123 action = "SET NULL" 5124 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5125 action = "SET DEFAULT" 5126 else: 5127 self.raise_error("Invalid key constraint") 5128 5129 options.append(f"ON {on} {action}") 5130 elif self._match_text_seq("NOT", "ENFORCED"): 5131 options.append("NOT ENFORCED") 5132 elif self._match_text_seq("DEFERRABLE"): 5133 options.append("DEFERRABLE") 5134 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5135 options.append("INITIALLY DEFERRED") 5136 elif self._match_text_seq("NORELY"): 5137 options.append("NORELY") 5138 elif self._match_text_seq("MATCH", "FULL"): 5139 options.append("MATCH FULL") 5140 else: 5141 break 5142 5143 return options 5144 5145 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5146 if match and not self._match(TokenType.REFERENCES): 5147 return None 5148 5149 expressions = None 5150 this = self._parse_table(schema=True) 5151 options = self._parse_key_constraint_options() 5152 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5153 5154 def _parse_foreign_key(self) -> exp.ForeignKey: 5155 expressions = self._parse_wrapped_id_vars() 5156 reference = self._parse_references() 5157 options = {} 5158 5159 while self._match(TokenType.ON): 5160 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5161 self.raise_error("Expected DELETE or UPDATE") 5162 5163 kind = self._prev.text.lower() 5164 5165 if self._match_text_seq("NO", "ACTION"): 5166 action = "NO ACTION" 5167 elif self._match(TokenType.SET): 5168 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5169 action = "SET " + self._prev.text.upper() 5170 else: 5171 self._advance() 5172 action = self._prev.text.upper() 5173 5174 options[kind] = action 5175 5176 return self.expression( 5177 exp.ForeignKey, 5178 expressions=expressions, 5179 reference=reference, 5180 **options, # type: ignore 5181 ) 5182 5183 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5184 return self._parse_field() 5185 5186 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5187 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5188 self._retreat(self._index - 1) 5189 return None 5190 5191 id_vars = self._parse_wrapped_id_vars() 5192 return self.expression( 5193 exp.PeriodForSystemTimeConstraint, 5194 this=seq_get(id_vars, 0), 5195 expression=seq_get(id_vars, 1), 5196 ) 5197 5198 def _parse_primary_key( 5199 self, wrapped_optional: bool = False, in_props: bool = False 5200 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5201 desc = ( 5202 self._match_set((TokenType.ASC, TokenType.DESC)) 5203 and self._prev.token_type == TokenType.DESC 5204 ) 5205 5206 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5207 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5208 5209 expressions = self._parse_wrapped_csv( 5210 self._parse_primary_key_part, optional=wrapped_optional 5211 ) 5212 options = self._parse_key_constraint_options() 5213 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5214 5215 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5216 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5217 5218 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5219 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5220 return this 5221 5222 bracket_kind = self._prev.token_type 5223 expressions = self._parse_csv( 5224 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5225 ) 5226 5227 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5228 self.raise_error("Expected ]") 5229 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5230 self.raise_error("Expected }") 5231 5232 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5233 if bracket_kind == TokenType.L_BRACE: 5234 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5235 elif not this: 5236 this = self.expression(exp.Array, expressions=expressions) 5237 else: 5238 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5239 if constructor_type: 5240 return self.expression(constructor_type, expressions=expressions) 5241 5242 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5243 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5244 5245 self._add_comments(this) 5246 return self._parse_bracket(this) 5247 5248 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5249 if self._match(TokenType.COLON): 5250 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5251 return this 5252 5253 def _parse_case(self) -> t.Optional[exp.Expression]: 5254 ifs = [] 5255 default = None 5256 5257 comments = self._prev_comments 5258 expression = self._parse_assignment() 5259 5260 while self._match(TokenType.WHEN): 5261 this = self._parse_assignment() 5262 self._match(TokenType.THEN) 5263 then = self._parse_assignment() 5264 ifs.append(self.expression(exp.If, this=this, true=then)) 5265 5266 if self._match(TokenType.ELSE): 5267 default = self._parse_assignment() 5268 5269 if not self._match(TokenType.END): 5270 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5271 default = exp.column("interval") 5272 else: 5273 self.raise_error("Expected END after CASE", self._prev) 5274 5275 return self.expression( 5276 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5277 ) 5278 5279 def _parse_if(self) -> t.Optional[exp.Expression]: 5280 if self._match(TokenType.L_PAREN): 5281 args = self._parse_csv(self._parse_assignment) 5282 this = self.validate_expression(exp.If.from_arg_list(args), args) 5283 self._match_r_paren() 5284 else: 5285 index = self._index - 1 5286 5287 if self.NO_PAREN_IF_COMMANDS and index == 0: 5288 return self._parse_as_command(self._prev) 5289 5290 condition = self._parse_assignment() 5291 5292 if not condition: 5293 self._retreat(index) 5294 return None 5295 5296 self._match(TokenType.THEN) 5297 true = self._parse_assignment() 5298 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5299 self._match(TokenType.END) 5300 this = self.expression(exp.If, this=condition, true=true, false=false) 5301 5302 return this 5303 5304 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5305 if not self._match_text_seq("VALUE", "FOR"): 5306 self._retreat(self._index - 1) 5307 return None 5308 5309 return self.expression( 5310 exp.NextValueFor, 5311 this=self._parse_column(), 5312 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5313 ) 5314 5315 def _parse_extract(self) -> exp.Extract: 5316 this = self._parse_function() or self._parse_var() or self._parse_type() 5317 5318 if self._match(TokenType.FROM): 5319 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5320 5321 if not self._match(TokenType.COMMA): 5322 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5323 5324 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5325 5326 def _parse_gap_fill(self) -> exp.GapFill: 5327 self._match(TokenType.TABLE) 5328 this = self._parse_table() 5329 5330 self._match(TokenType.COMMA) 5331 args = [this, *self._parse_csv(self._parse_lambda)] 5332 5333 gap_fill = exp.GapFill.from_arg_list(args) 5334 return self.validate_expression(gap_fill, args) 5335 5336 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5337 this = self._parse_assignment() 5338 5339 if not self._match(TokenType.ALIAS): 5340 if self._match(TokenType.COMMA): 5341 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5342 5343 self.raise_error("Expected AS after CAST") 5344 5345 fmt = None 5346 to = self._parse_types() 5347 5348 if self._match(TokenType.FORMAT): 5349 fmt_string = self._parse_string() 5350 fmt = self._parse_at_time_zone(fmt_string) 5351 5352 if not to: 5353 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5354 if to.this in exp.DataType.TEMPORAL_TYPES: 5355 this = self.expression( 5356 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5357 this=this, 5358 format=exp.Literal.string( 5359 format_time( 5360 fmt_string.this if fmt_string else "", 5361 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5362 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5363 ) 5364 ), 5365 ) 5366 5367 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5368 this.set("zone", fmt.args["zone"]) 5369 return this 5370 elif not to: 5371 self.raise_error("Expected TYPE after CAST") 5372 elif isinstance(to, exp.Identifier): 5373 to = exp.DataType.build(to.name, udt=True) 5374 elif to.this == exp.DataType.Type.CHAR: 5375 if self._match(TokenType.CHARACTER_SET): 5376 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5377 5378 return self.expression( 5379 exp.Cast if strict else exp.TryCast, 5380 this=this, 5381 to=to, 5382 format=fmt, 5383 safe=safe, 5384 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5385 ) 5386 5387 def _parse_string_agg(self) -> exp.Expression: 5388 if self._match(TokenType.DISTINCT): 5389 args: t.List[t.Optional[exp.Expression]] = [ 5390 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5391 ] 5392 if self._match(TokenType.COMMA): 5393 args.extend(self._parse_csv(self._parse_assignment)) 5394 else: 5395 args = self._parse_csv(self._parse_assignment) # type: ignore 5396 5397 index = self._index 5398 if not self._match(TokenType.R_PAREN) and args: 5399 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5400 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5401 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5402 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5403 5404 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5405 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5406 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5407 if not self._match_text_seq("WITHIN", "GROUP"): 5408 self._retreat(index) 5409 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5410 5411 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5412 order = self._parse_order(this=seq_get(args, 0)) 5413 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5414 5415 def _parse_convert( 5416 self, strict: bool, safe: t.Optional[bool] = None 5417 ) -> t.Optional[exp.Expression]: 5418 this = self._parse_bitwise() 5419 5420 if self._match(TokenType.USING): 5421 to: t.Optional[exp.Expression] = self.expression( 5422 exp.CharacterSet, this=self._parse_var() 5423 ) 5424 elif self._match(TokenType.COMMA): 5425 to = self._parse_types() 5426 else: 5427 to = None 5428 5429 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5430 5431 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5432 """ 5433 There are generally two variants of the DECODE function: 5434 5435 - DECODE(bin, charset) 5436 - DECODE(expression, search, result [, search, result] ... [, default]) 5437 5438 The second variant will always be parsed into a CASE expression. Note that NULL 5439 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5440 instead of relying on pattern matching. 5441 """ 5442 args = self._parse_csv(self._parse_assignment) 5443 5444 if len(args) < 3: 5445 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5446 5447 expression, *expressions = args 5448 if not expression: 5449 return None 5450 5451 ifs = [] 5452 for search, result in zip(expressions[::2], expressions[1::2]): 5453 if not search or not result: 5454 return None 5455 5456 if isinstance(search, exp.Literal): 5457 ifs.append( 5458 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5459 ) 5460 elif isinstance(search, exp.Null): 5461 ifs.append( 5462 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5463 ) 5464 else: 5465 cond = exp.or_( 5466 exp.EQ(this=expression.copy(), expression=search), 5467 exp.and_( 5468 exp.Is(this=expression.copy(), expression=exp.Null()), 5469 exp.Is(this=search.copy(), expression=exp.Null()), 5470 copy=False, 5471 ), 5472 copy=False, 5473 ) 5474 ifs.append(exp.If(this=cond, true=result)) 5475 5476 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5477 5478 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5479 self._match_text_seq("KEY") 5480 key = self._parse_column() 5481 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5482 self._match_text_seq("VALUE") 5483 value = self._parse_bitwise() 5484 5485 if not key and not value: 5486 return None 5487 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5488 5489 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5490 if not this or not self._match_text_seq("FORMAT", "JSON"): 5491 return this 5492 5493 return self.expression(exp.FormatJson, this=this) 5494 5495 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5496 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5497 for value in values: 5498 if self._match_text_seq(value, "ON", on): 5499 return f"{value} ON {on}" 5500 5501 return None 5502 5503 @t.overload 5504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5505 5506 @t.overload 5507 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5508 5509 def _parse_json_object(self, agg=False): 5510 star = self._parse_star() 5511 expressions = ( 5512 [star] 5513 if star 5514 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5515 ) 5516 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5517 5518 unique_keys = None 5519 if self._match_text_seq("WITH", "UNIQUE"): 5520 unique_keys = True 5521 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5522 unique_keys = False 5523 5524 self._match_text_seq("KEYS") 5525 5526 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5527 self._parse_type() 5528 ) 5529 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5530 5531 return self.expression( 5532 exp.JSONObjectAgg if agg else exp.JSONObject, 5533 expressions=expressions, 5534 null_handling=null_handling, 5535 unique_keys=unique_keys, 5536 return_type=return_type, 5537 encoding=encoding, 5538 ) 5539 5540 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5541 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5542 if not self._match_text_seq("NESTED"): 5543 this = self._parse_id_var() 5544 kind = self._parse_types(allow_identifiers=False) 5545 nested = None 5546 else: 5547 this = None 5548 kind = None 5549 nested = True 5550 5551 path = self._match_text_seq("PATH") and self._parse_string() 5552 nested_schema = nested and self._parse_json_schema() 5553 5554 return self.expression( 5555 exp.JSONColumnDef, 5556 this=this, 5557 kind=kind, 5558 path=path, 5559 nested_schema=nested_schema, 5560 ) 5561 5562 def _parse_json_schema(self) -> exp.JSONSchema: 5563 self._match_text_seq("COLUMNS") 5564 return self.expression( 5565 exp.JSONSchema, 5566 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5567 ) 5568 5569 def _parse_json_table(self) -> exp.JSONTable: 5570 this = self._parse_format_json(self._parse_bitwise()) 5571 path = self._match(TokenType.COMMA) and self._parse_string() 5572 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5573 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5574 schema = self._parse_json_schema() 5575 5576 return exp.JSONTable( 5577 this=this, 5578 schema=schema, 5579 path=path, 5580 error_handling=error_handling, 5581 empty_handling=empty_handling, 5582 ) 5583 5584 def _parse_match_against(self) -> exp.MatchAgainst: 5585 expressions = self._parse_csv(self._parse_column) 5586 5587 self._match_text_seq(")", "AGAINST", "(") 5588 5589 this = self._parse_string() 5590 5591 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5592 modifier = "IN NATURAL LANGUAGE MODE" 5593 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5594 modifier = f"{modifier} WITH QUERY EXPANSION" 5595 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5596 modifier = "IN BOOLEAN MODE" 5597 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5598 modifier = "WITH QUERY EXPANSION" 5599 else: 5600 modifier = None 5601 5602 return self.expression( 5603 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5604 ) 5605 5606 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5607 def _parse_open_json(self) -> exp.OpenJSON: 5608 this = self._parse_bitwise() 5609 path = self._match(TokenType.COMMA) and self._parse_string() 5610 5611 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5612 this = self._parse_field(any_token=True) 5613 kind = self._parse_types() 5614 path = self._parse_string() 5615 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5616 5617 return self.expression( 5618 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5619 ) 5620 5621 expressions = None 5622 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5623 self._match_l_paren() 5624 expressions = self._parse_csv(_parse_open_json_column_def) 5625 5626 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5627 5628 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5629 args = self._parse_csv(self._parse_bitwise) 5630 5631 if self._match(TokenType.IN): 5632 return self.expression( 5633 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5634 ) 5635 5636 if haystack_first: 5637 haystack = seq_get(args, 0) 5638 needle = seq_get(args, 1) 5639 else: 5640 needle = seq_get(args, 0) 5641 haystack = seq_get(args, 1) 5642 5643 return self.expression( 5644 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5645 ) 5646 5647 def _parse_predict(self) -> exp.Predict: 5648 self._match_text_seq("MODEL") 5649 this = self._parse_table() 5650 5651 self._match(TokenType.COMMA) 5652 self._match_text_seq("TABLE") 5653 5654 return self.expression( 5655 exp.Predict, 5656 this=this, 5657 expression=self._parse_table(), 5658 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5659 ) 5660 5661 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5662 args = self._parse_csv(self._parse_table) 5663 return exp.JoinHint(this=func_name.upper(), expressions=args) 5664 5665 def _parse_substring(self) -> exp.Substring: 5666 # Postgres supports the form: substring(string [from int] [for int]) 5667 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5668 5669 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5670 5671 if self._match(TokenType.FROM): 5672 args.append(self._parse_bitwise()) 5673 if self._match(TokenType.FOR): 5674 if len(args) == 1: 5675 args.append(exp.Literal.number(1)) 5676 args.append(self._parse_bitwise()) 5677 5678 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5679 5680 def _parse_trim(self) -> exp.Trim: 5681 # https://www.w3resource.com/sql/character-functions/trim.php 5682 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5683 5684 position = None 5685 collation = None 5686 expression = None 5687 5688 if self._match_texts(self.TRIM_TYPES): 5689 position = self._prev.text.upper() 5690 5691 this = self._parse_bitwise() 5692 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5693 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5694 expression = self._parse_bitwise() 5695 5696 if invert_order: 5697 this, expression = expression, this 5698 5699 if self._match(TokenType.COLLATE): 5700 collation = self._parse_bitwise() 5701 5702 return self.expression( 5703 exp.Trim, this=this, position=position, expression=expression, collation=collation 5704 ) 5705 5706 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5707 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5708 5709 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5710 return self._parse_window(self._parse_id_var(), alias=True) 5711 5712 def _parse_respect_or_ignore_nulls( 5713 self, this: t.Optional[exp.Expression] 5714 ) -> t.Optional[exp.Expression]: 5715 if self._match_text_seq("IGNORE", "NULLS"): 5716 return self.expression(exp.IgnoreNulls, this=this) 5717 if self._match_text_seq("RESPECT", "NULLS"): 5718 return self.expression(exp.RespectNulls, this=this) 5719 return this 5720 5721 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5722 if self._match(TokenType.HAVING): 5723 self._match_texts(("MAX", "MIN")) 5724 max = self._prev.text.upper() != "MIN" 5725 return self.expression( 5726 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5727 ) 5728 5729 return this 5730 5731 def _parse_window( 5732 self, this: t.Optional[exp.Expression], alias: bool = False 5733 ) -> t.Optional[exp.Expression]: 5734 func = this 5735 comments = func.comments if isinstance(func, exp.Expression) else None 5736 5737 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5738 self._match(TokenType.WHERE) 5739 this = self.expression( 5740 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5741 ) 5742 self._match_r_paren() 5743 5744 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5745 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5746 if self._match_text_seq("WITHIN", "GROUP"): 5747 order = self._parse_wrapped(self._parse_order) 5748 this = self.expression(exp.WithinGroup, this=this, expression=order) 5749 5750 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5751 # Some dialects choose to implement and some do not. 5752 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5753 5754 # There is some code above in _parse_lambda that handles 5755 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5756 5757 # The below changes handle 5758 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5759 5760 # Oracle allows both formats 5761 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5762 # and Snowflake chose to do the same for familiarity 5763 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5764 if isinstance(this, exp.AggFunc): 5765 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5766 5767 if ignore_respect and ignore_respect is not this: 5768 ignore_respect.replace(ignore_respect.this) 5769 this = self.expression(ignore_respect.__class__, this=this) 5770 5771 this = self._parse_respect_or_ignore_nulls(this) 5772 5773 # bigquery select from window x AS (partition by ...) 5774 if alias: 5775 over = None 5776 self._match(TokenType.ALIAS) 5777 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5778 return this 5779 else: 5780 over = self._prev.text.upper() 5781 5782 if comments and isinstance(func, exp.Expression): 5783 func.pop_comments() 5784 5785 if not self._match(TokenType.L_PAREN): 5786 return self.expression( 5787 exp.Window, 5788 comments=comments, 5789 this=this, 5790 alias=self._parse_id_var(False), 5791 over=over, 5792 ) 5793 5794 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5795 5796 first = self._match(TokenType.FIRST) 5797 if self._match_text_seq("LAST"): 5798 first = False 5799 5800 partition, order = self._parse_partition_and_order() 5801 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5802 5803 if kind: 5804 self._match(TokenType.BETWEEN) 5805 start = self._parse_window_spec() 5806 self._match(TokenType.AND) 5807 end = self._parse_window_spec() 5808 5809 spec = self.expression( 5810 exp.WindowSpec, 5811 kind=kind, 5812 start=start["value"], 5813 start_side=start["side"], 5814 end=end["value"], 5815 end_side=end["side"], 5816 ) 5817 else: 5818 spec = None 5819 5820 self._match_r_paren() 5821 5822 window = self.expression( 5823 exp.Window, 5824 comments=comments, 5825 this=this, 5826 partition_by=partition, 5827 order=order, 5828 spec=spec, 5829 alias=window_alias, 5830 over=over, 5831 first=first, 5832 ) 5833 5834 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5835 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5836 return self._parse_window(window, alias=alias) 5837 5838 return window 5839 5840 def _parse_partition_and_order( 5841 self, 5842 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5843 return self._parse_partition_by(), self._parse_order() 5844 5845 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5846 self._match(TokenType.BETWEEN) 5847 5848 return { 5849 "value": ( 5850 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5851 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5852 or self._parse_bitwise() 5853 ), 5854 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5855 } 5856 5857 def _parse_alias( 5858 self, this: t.Optional[exp.Expression], explicit: bool = False 5859 ) -> t.Optional[exp.Expression]: 5860 any_token = self._match(TokenType.ALIAS) 5861 comments = self._prev_comments or [] 5862 5863 if explicit and not any_token: 5864 return this 5865 5866 if self._match(TokenType.L_PAREN): 5867 aliases = self.expression( 5868 exp.Aliases, 5869 comments=comments, 5870 this=this, 5871 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5872 ) 5873 self._match_r_paren(aliases) 5874 return aliases 5875 5876 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5877 self.STRING_ALIASES and self._parse_string_as_identifier() 5878 ) 5879 5880 if alias: 5881 comments.extend(alias.pop_comments()) 5882 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5883 column = this.this 5884 5885 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5886 if not this.comments and column and column.comments: 5887 this.comments = column.pop_comments() 5888 5889 return this 5890 5891 def _parse_id_var( 5892 self, 5893 any_token: bool = True, 5894 tokens: t.Optional[t.Collection[TokenType]] = None, 5895 ) -> t.Optional[exp.Expression]: 5896 expression = self._parse_identifier() 5897 if not expression and ( 5898 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5899 ): 5900 quoted = self._prev.token_type == TokenType.STRING 5901 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5902 5903 return expression 5904 5905 def _parse_string(self) -> t.Optional[exp.Expression]: 5906 if self._match_set(self.STRING_PARSERS): 5907 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5908 return self._parse_placeholder() 5909 5910 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5911 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5912 5913 def _parse_number(self) -> t.Optional[exp.Expression]: 5914 if self._match_set(self.NUMERIC_PARSERS): 5915 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5916 return self._parse_placeholder() 5917 5918 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5919 if self._match(TokenType.IDENTIFIER): 5920 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5921 return self._parse_placeholder() 5922 5923 def _parse_var( 5924 self, 5925 any_token: bool = False, 5926 tokens: t.Optional[t.Collection[TokenType]] = None, 5927 upper: bool = False, 5928 ) -> t.Optional[exp.Expression]: 5929 if ( 5930 (any_token and self._advance_any()) 5931 or self._match(TokenType.VAR) 5932 or (self._match_set(tokens) if tokens else False) 5933 ): 5934 return self.expression( 5935 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5936 ) 5937 return self._parse_placeholder() 5938 5939 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5940 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5941 self._advance() 5942 return self._prev 5943 return None 5944 5945 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5946 return self._parse_var() or self._parse_string() 5947 5948 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5949 return self._parse_primary() or self._parse_var(any_token=True) 5950 5951 def _parse_null(self) -> t.Optional[exp.Expression]: 5952 if self._match_set(self.NULL_TOKENS): 5953 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5954 return self._parse_placeholder() 5955 5956 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5957 if self._match(TokenType.TRUE): 5958 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5959 if self._match(TokenType.FALSE): 5960 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5961 return self._parse_placeholder() 5962 5963 def _parse_star(self) -> t.Optional[exp.Expression]: 5964 if self._match(TokenType.STAR): 5965 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5966 return self._parse_placeholder() 5967 5968 def _parse_parameter(self) -> exp.Parameter: 5969 this = self._parse_identifier() or self._parse_primary_or_var() 5970 return self.expression(exp.Parameter, this=this) 5971 5972 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5973 if self._match_set(self.PLACEHOLDER_PARSERS): 5974 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5975 if placeholder: 5976 return placeholder 5977 self._advance(-1) 5978 return None 5979 5980 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5981 if not self._match_texts(keywords): 5982 return None 5983 if self._match(TokenType.L_PAREN, advance=False): 5984 return self._parse_wrapped_csv(self._parse_expression) 5985 5986 expression = self._parse_expression() 5987 return [expression] if expression else None 5988 5989 def _parse_csv( 5990 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5991 ) -> t.List[exp.Expression]: 5992 parse_result = parse_method() 5993 items = [parse_result] if parse_result is not None else [] 5994 5995 while self._match(sep): 5996 self._add_comments(parse_result) 5997 parse_result = parse_method() 5998 if parse_result is not None: 5999 items.append(parse_result) 6000 6001 return items 6002 6003 def _parse_tokens( 6004 self, parse_method: t.Callable, expressions: t.Dict 6005 ) -> t.Optional[exp.Expression]: 6006 this = parse_method() 6007 6008 while self._match_set(expressions): 6009 this = self.expression( 6010 expressions[self._prev.token_type], 6011 this=this, 6012 comments=self._prev_comments, 6013 expression=parse_method(), 6014 ) 6015 6016 return this 6017 6018 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6019 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6020 6021 def _parse_wrapped_csv( 6022 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6023 ) -> t.List[exp.Expression]: 6024 return self._parse_wrapped( 6025 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6026 ) 6027 6028 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6029 wrapped = self._match(TokenType.L_PAREN) 6030 if not wrapped and not optional: 6031 self.raise_error("Expecting (") 6032 parse_result = parse_method() 6033 if wrapped: 6034 self._match_r_paren() 6035 return parse_result 6036 6037 def _parse_expressions(self) -> t.List[exp.Expression]: 6038 return self._parse_csv(self._parse_expression) 6039 6040 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6041 return self._parse_select() or self._parse_set_operations( 6042 self._parse_expression() if alias else self._parse_assignment() 6043 ) 6044 6045 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6046 return self._parse_query_modifiers( 6047 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6048 ) 6049 6050 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6051 this = None 6052 if self._match_texts(self.TRANSACTION_KIND): 6053 this = self._prev.text 6054 6055 self._match_texts(("TRANSACTION", "WORK")) 6056 6057 modes = [] 6058 while True: 6059 mode = [] 6060 while self._match(TokenType.VAR): 6061 mode.append(self._prev.text) 6062 6063 if mode: 6064 modes.append(" ".join(mode)) 6065 if not self._match(TokenType.COMMA): 6066 break 6067 6068 return self.expression(exp.Transaction, this=this, modes=modes) 6069 6070 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6071 chain = None 6072 savepoint = None 6073 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6074 6075 self._match_texts(("TRANSACTION", "WORK")) 6076 6077 if self._match_text_seq("TO"): 6078 self._match_text_seq("SAVEPOINT") 6079 savepoint = self._parse_id_var() 6080 6081 if self._match(TokenType.AND): 6082 chain = not self._match_text_seq("NO") 6083 self._match_text_seq("CHAIN") 6084 6085 if is_rollback: 6086 return self.expression(exp.Rollback, savepoint=savepoint) 6087 6088 return self.expression(exp.Commit, chain=chain) 6089 6090 def _parse_refresh(self) -> exp.Refresh: 6091 self._match(TokenType.TABLE) 6092 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6093 6094 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6095 if not self._match_text_seq("ADD"): 6096 return None 6097 6098 self._match(TokenType.COLUMN) 6099 exists_column = self._parse_exists(not_=True) 6100 expression = self._parse_field_def() 6101 6102 if expression: 6103 expression.set("exists", exists_column) 6104 6105 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6106 if self._match_texts(("FIRST", "AFTER")): 6107 position = self._prev.text 6108 column_position = self.expression( 6109 exp.ColumnPosition, this=self._parse_column(), position=position 6110 ) 6111 expression.set("position", column_position) 6112 6113 return expression 6114 6115 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6116 drop = self._match(TokenType.DROP) and self._parse_drop() 6117 if drop and not isinstance(drop, exp.Command): 6118 drop.set("kind", drop.args.get("kind", "COLUMN")) 6119 return drop 6120 6121 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6122 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6123 return self.expression( 6124 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6125 ) 6126 6127 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6128 index = self._index - 1 6129 6130 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6131 return self._parse_csv( 6132 lambda: self.expression( 6133 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6134 ) 6135 ) 6136 6137 self._retreat(index) 6138 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6139 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6140 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6141 6142 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6143 if self._match_texts(self.ALTER_ALTER_PARSERS): 6144 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6145 6146 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6147 # keyword after ALTER we default to parsing this statement 6148 self._match(TokenType.COLUMN) 6149 column = self._parse_field(any_token=True) 6150 6151 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6152 return self.expression(exp.AlterColumn, this=column, drop=True) 6153 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6154 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6155 if self._match(TokenType.COMMENT): 6156 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6157 if self._match_text_seq("DROP", "NOT", "NULL"): 6158 return self.expression( 6159 exp.AlterColumn, 6160 this=column, 6161 drop=True, 6162 allow_null=True, 6163 ) 6164 if self._match_text_seq("SET", "NOT", "NULL"): 6165 return self.expression( 6166 exp.AlterColumn, 6167 this=column, 6168 allow_null=False, 6169 ) 6170 self._match_text_seq("SET", "DATA") 6171 self._match_text_seq("TYPE") 6172 return self.expression( 6173 exp.AlterColumn, 6174 this=column, 6175 dtype=self._parse_types(), 6176 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6177 using=self._match(TokenType.USING) and self._parse_assignment(), 6178 ) 6179 6180 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6181 if self._match_texts(("ALL", "EVEN", "AUTO")): 6182 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6183 6184 self._match_text_seq("KEY", "DISTKEY") 6185 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6186 6187 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6188 if compound: 6189 self._match_text_seq("SORTKEY") 6190 6191 if self._match(TokenType.L_PAREN, advance=False): 6192 return self.expression( 6193 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6194 ) 6195 6196 self._match_texts(("AUTO", "NONE")) 6197 return self.expression( 6198 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6199 ) 6200 6201 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6202 index = self._index - 1 6203 6204 partition_exists = self._parse_exists() 6205 if self._match(TokenType.PARTITION, advance=False): 6206 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6207 6208 self._retreat(index) 6209 return self._parse_csv(self._parse_drop_column) 6210 6211 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6212 if self._match(TokenType.COLUMN): 6213 exists = self._parse_exists() 6214 old_column = self._parse_column() 6215 to = self._match_text_seq("TO") 6216 new_column = self._parse_column() 6217 6218 if old_column is None or to is None or new_column is None: 6219 return None 6220 6221 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6222 6223 self._match_text_seq("TO") 6224 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6225 6226 def _parse_alter_table_set(self) -> exp.AlterSet: 6227 alter_set = self.expression(exp.AlterSet) 6228 6229 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6230 "TABLE", "PROPERTIES" 6231 ): 6232 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6233 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6234 alter_set.set("expressions", [self._parse_assignment()]) 6235 elif self._match_texts(("LOGGED", "UNLOGGED")): 6236 alter_set.set("option", exp.var(self._prev.text.upper())) 6237 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6238 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6239 elif self._match_text_seq("LOCATION"): 6240 alter_set.set("location", self._parse_field()) 6241 elif self._match_text_seq("ACCESS", "METHOD"): 6242 alter_set.set("access_method", self._parse_field()) 6243 elif self._match_text_seq("TABLESPACE"): 6244 alter_set.set("tablespace", self._parse_field()) 6245 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6246 alter_set.set("file_format", [self._parse_field()]) 6247 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6248 alter_set.set("file_format", self._parse_wrapped_options()) 6249 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6250 alter_set.set("copy_options", self._parse_wrapped_options()) 6251 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6252 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6253 else: 6254 if self._match_text_seq("SERDE"): 6255 alter_set.set("serde", self._parse_field()) 6256 6257 alter_set.set("expressions", [self._parse_properties()]) 6258 6259 return alter_set 6260 6261 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6262 start = self._prev 6263 6264 if not self._match(TokenType.TABLE): 6265 return self._parse_as_command(start) 6266 6267 exists = self._parse_exists() 6268 only = self._match_text_seq("ONLY") 6269 this = self._parse_table(schema=True) 6270 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6271 6272 if self._next: 6273 self._advance() 6274 6275 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6276 if parser: 6277 actions = ensure_list(parser(self)) 6278 options = self._parse_csv(self._parse_property) 6279 6280 if not self._curr and actions: 6281 return self.expression( 6282 exp.AlterTable, 6283 this=this, 6284 exists=exists, 6285 actions=actions, 6286 only=only, 6287 options=options, 6288 cluster=cluster, 6289 ) 6290 6291 return self._parse_as_command(start) 6292 6293 def _parse_merge(self) -> exp.Merge: 6294 self._match(TokenType.INTO) 6295 target = self._parse_table() 6296 6297 if target and self._match(TokenType.ALIAS, advance=False): 6298 target.set("alias", self._parse_table_alias()) 6299 6300 self._match(TokenType.USING) 6301 using = self._parse_table() 6302 6303 self._match(TokenType.ON) 6304 on = self._parse_assignment() 6305 6306 return self.expression( 6307 exp.Merge, 6308 this=target, 6309 using=using, 6310 on=on, 6311 expressions=self._parse_when_matched(), 6312 ) 6313 6314 def _parse_when_matched(self) -> t.List[exp.When]: 6315 whens = [] 6316 6317 while self._match(TokenType.WHEN): 6318 matched = not self._match(TokenType.NOT) 6319 self._match_text_seq("MATCHED") 6320 source = ( 6321 False 6322 if self._match_text_seq("BY", "TARGET") 6323 else self._match_text_seq("BY", "SOURCE") 6324 ) 6325 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6326 6327 self._match(TokenType.THEN) 6328 6329 if self._match(TokenType.INSERT): 6330 _this = self._parse_star() 6331 if _this: 6332 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6333 else: 6334 then = self.expression( 6335 exp.Insert, 6336 this=self._parse_value(), 6337 expression=self._match_text_seq("VALUES") and self._parse_value(), 6338 ) 6339 elif self._match(TokenType.UPDATE): 6340 expressions = self._parse_star() 6341 if expressions: 6342 then = self.expression(exp.Update, expressions=expressions) 6343 else: 6344 then = self.expression( 6345 exp.Update, 6346 expressions=self._match(TokenType.SET) 6347 and self._parse_csv(self._parse_equality), 6348 ) 6349 elif self._match(TokenType.DELETE): 6350 then = self.expression(exp.Var, this=self._prev.text) 6351 else: 6352 then = None 6353 6354 whens.append( 6355 self.expression( 6356 exp.When, 6357 matched=matched, 6358 source=source, 6359 condition=condition, 6360 then=then, 6361 ) 6362 ) 6363 return whens 6364 6365 def _parse_show(self) -> t.Optional[exp.Expression]: 6366 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6367 if parser: 6368 return parser(self) 6369 return self._parse_as_command(self._prev) 6370 6371 def _parse_set_item_assignment( 6372 self, kind: t.Optional[str] = None 6373 ) -> t.Optional[exp.Expression]: 6374 index = self._index 6375 6376 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6377 return self._parse_set_transaction(global_=kind == "GLOBAL") 6378 6379 left = self._parse_primary() or self._parse_column() 6380 assignment_delimiter = self._match_texts(("=", "TO")) 6381 6382 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6383 self._retreat(index) 6384 return None 6385 6386 right = self._parse_statement() or self._parse_id_var() 6387 if isinstance(right, (exp.Column, exp.Identifier)): 6388 right = exp.var(right.name) 6389 6390 this = self.expression(exp.EQ, this=left, expression=right) 6391 return self.expression(exp.SetItem, this=this, kind=kind) 6392 6393 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6394 self._match_text_seq("TRANSACTION") 6395 characteristics = self._parse_csv( 6396 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6397 ) 6398 return self.expression( 6399 exp.SetItem, 6400 expressions=characteristics, 6401 kind="TRANSACTION", 6402 **{"global": global_}, # type: ignore 6403 ) 6404 6405 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6406 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6407 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6408 6409 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6410 index = self._index 6411 set_ = self.expression( 6412 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6413 ) 6414 6415 if self._curr: 6416 self._retreat(index) 6417 return self._parse_as_command(self._prev) 6418 6419 return set_ 6420 6421 def _parse_var_from_options( 6422 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6423 ) -> t.Optional[exp.Var]: 6424 start = self._curr 6425 if not start: 6426 return None 6427 6428 option = start.text.upper() 6429 continuations = options.get(option) 6430 6431 index = self._index 6432 self._advance() 6433 for keywords in continuations or []: 6434 if isinstance(keywords, str): 6435 keywords = (keywords,) 6436 6437 if self._match_text_seq(*keywords): 6438 option = f"{option} {' '.join(keywords)}" 6439 break 6440 else: 6441 if continuations or continuations is None: 6442 if raise_unmatched: 6443 self.raise_error(f"Unknown option {option}") 6444 6445 self._retreat(index) 6446 return None 6447 6448 return exp.var(option) 6449 6450 def _parse_as_command(self, start: Token) -> exp.Command: 6451 while self._curr: 6452 self._advance() 6453 text = self._find_sql(start, self._prev) 6454 size = len(start.text) 6455 self._warn_unsupported() 6456 return exp.Command(this=text[:size], expression=text[size:]) 6457 6458 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6459 settings = [] 6460 6461 self._match_l_paren() 6462 kind = self._parse_id_var() 6463 6464 if self._match(TokenType.L_PAREN): 6465 while True: 6466 key = self._parse_id_var() 6467 value = self._parse_primary() 6468 6469 if not key and value is None: 6470 break 6471 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6472 self._match(TokenType.R_PAREN) 6473 6474 self._match_r_paren() 6475 6476 return self.expression( 6477 exp.DictProperty, 6478 this=this, 6479 kind=kind.this if kind else None, 6480 settings=settings, 6481 ) 6482 6483 def _parse_dict_range(self, this: str) -> exp.DictRange: 6484 self._match_l_paren() 6485 has_min = self._match_text_seq("MIN") 6486 if has_min: 6487 min = self._parse_var() or self._parse_primary() 6488 self._match_text_seq("MAX") 6489 max = self._parse_var() or self._parse_primary() 6490 else: 6491 max = self._parse_var() or self._parse_primary() 6492 min = exp.Literal.number(0) 6493 self._match_r_paren() 6494 return self.expression(exp.DictRange, this=this, min=min, max=max) 6495 6496 def _parse_comprehension( 6497 self, this: t.Optional[exp.Expression] 6498 ) -> t.Optional[exp.Comprehension]: 6499 index = self._index 6500 expression = self._parse_column() 6501 if not self._match(TokenType.IN): 6502 self._retreat(index - 1) 6503 return None 6504 iterator = self._parse_column() 6505 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6506 return self.expression( 6507 exp.Comprehension, 6508 this=this, 6509 expression=expression, 6510 iterator=iterator, 6511 condition=condition, 6512 ) 6513 6514 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6515 if self._match(TokenType.HEREDOC_STRING): 6516 return self.expression(exp.Heredoc, this=self._prev.text) 6517 6518 if not self._match_text_seq("$"): 6519 return None 6520 6521 tags = ["$"] 6522 tag_text = None 6523 6524 if self._is_connected(): 6525 self._advance() 6526 tags.append(self._prev.text.upper()) 6527 else: 6528 self.raise_error("No closing $ found") 6529 6530 if tags[-1] != "$": 6531 if self._is_connected() and self._match_text_seq("$"): 6532 tag_text = tags[-1] 6533 tags.append("$") 6534 else: 6535 self.raise_error("No closing $ found") 6536 6537 heredoc_start = self._curr 6538 6539 while self._curr: 6540 if self._match_text_seq(*tags, advance=False): 6541 this = self._find_sql(heredoc_start, self._prev) 6542 self._advance(len(tags)) 6543 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6544 6545 self._advance() 6546 6547 self.raise_error(f"No closing {''.join(tags)} found") 6548 return None 6549 6550 def _find_parser( 6551 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6552 ) -> t.Optional[t.Callable]: 6553 if not self._curr: 6554 return None 6555 6556 index = self._index 6557 this = [] 6558 while True: 6559 # The current token might be multiple words 6560 curr = self._curr.text.upper() 6561 key = curr.split(" ") 6562 this.append(curr) 6563 6564 self._advance() 6565 result, trie = in_trie(trie, key) 6566 if result == TrieResult.FAILED: 6567 break 6568 6569 if result == TrieResult.EXISTS: 6570 subparser = parsers[" ".join(this)] 6571 return subparser 6572 6573 self._retreat(index) 6574 return None 6575 6576 def _match(self, token_type, advance=True, expression=None): 6577 if not self._curr: 6578 return None 6579 6580 if self._curr.token_type == token_type: 6581 if advance: 6582 self._advance() 6583 self._add_comments(expression) 6584 return True 6585 6586 return None 6587 6588 def _match_set(self, types, advance=True): 6589 if not self._curr: 6590 return None 6591 6592 if self._curr.token_type in types: 6593 if advance: 6594 self._advance() 6595 return True 6596 6597 return None 6598 6599 def _match_pair(self, token_type_a, token_type_b, advance=True): 6600 if not self._curr or not self._next: 6601 return None 6602 6603 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6604 if advance: 6605 self._advance(2) 6606 return True 6607 6608 return None 6609 6610 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6611 if not self._match(TokenType.L_PAREN, expression=expression): 6612 self.raise_error("Expecting (") 6613 6614 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6615 if not self._match(TokenType.R_PAREN, expression=expression): 6616 self.raise_error("Expecting )") 6617 6618 def _match_texts(self, texts, advance=True): 6619 if self._curr and self._curr.text.upper() in texts: 6620 if advance: 6621 self._advance() 6622 return True 6623 return None 6624 6625 def _match_text_seq(self, *texts, advance=True): 6626 index = self._index 6627 for text in texts: 6628 if self._curr and self._curr.text.upper() == text: 6629 self._advance() 6630 else: 6631 self._retreat(index) 6632 return None 6633 6634 if not advance: 6635 self._retreat(index) 6636 6637 return True 6638 6639 def _replace_lambda( 6640 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6641 ) -> t.Optional[exp.Expression]: 6642 if not node: 6643 return node 6644 6645 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6646 6647 for column in node.find_all(exp.Column): 6648 typ = lambda_types.get(column.parts[0].name) 6649 if typ is not None: 6650 dot_or_id = column.to_dot() if column.table else column.this 6651 6652 if typ: 6653 dot_or_id = self.expression( 6654 exp.Cast, 6655 this=dot_or_id, 6656 to=typ, 6657 ) 6658 6659 parent = column.parent 6660 6661 while isinstance(parent, exp.Dot): 6662 if not isinstance(parent.parent, exp.Dot): 6663 parent.replace(dot_or_id) 6664 break 6665 parent = parent.parent 6666 else: 6667 if column is node: 6668 node = dot_or_id 6669 else: 6670 column.replace(dot_or_id) 6671 return node 6672 6673 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6674 start = self._prev 6675 6676 # Not to be confused with TRUNCATE(number, decimals) function call 6677 if self._match(TokenType.L_PAREN): 6678 self._retreat(self._index - 2) 6679 return self._parse_function() 6680 6681 # Clickhouse supports TRUNCATE DATABASE as well 6682 is_database = self._match(TokenType.DATABASE) 6683 6684 self._match(TokenType.TABLE) 6685 6686 exists = self._parse_exists(not_=False) 6687 6688 expressions = self._parse_csv( 6689 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6690 ) 6691 6692 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6693 6694 if self._match_text_seq("RESTART", "IDENTITY"): 6695 identity = "RESTART" 6696 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6697 identity = "CONTINUE" 6698 else: 6699 identity = None 6700 6701 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6702 option = self._prev.text 6703 else: 6704 option = None 6705 6706 partition = self._parse_partition() 6707 6708 # Fallback case 6709 if self._curr: 6710 return self._parse_as_command(start) 6711 6712 return self.expression( 6713 exp.TruncateTable, 6714 expressions=expressions, 6715 is_database=is_database, 6716 exists=exists, 6717 cluster=cluster, 6718 identity=identity, 6719 option=option, 6720 partition=partition, 6721 ) 6722 6723 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6724 this = self._parse_ordered(self._parse_opclass) 6725 6726 if not self._match(TokenType.WITH): 6727 return this 6728 6729 op = self._parse_var(any_token=True) 6730 6731 return self.expression(exp.WithOperator, this=this, op=op) 6732 6733 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6734 self._match(TokenType.EQ) 6735 self._match(TokenType.L_PAREN) 6736 6737 opts: t.List[t.Optional[exp.Expression]] = [] 6738 while self._curr and not self._match(TokenType.R_PAREN): 6739 if self._match_text_seq("FORMAT_NAME", "="): 6740 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6741 # so we parse it separately to use _parse_field() 6742 prop = self.expression( 6743 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6744 ) 6745 opts.append(prop) 6746 else: 6747 opts.append(self._parse_property()) 6748 6749 self._match(TokenType.COMMA) 6750 6751 return opts 6752 6753 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6754 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6755 6756 options = [] 6757 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6758 option = self._parse_var(any_token=True) 6759 prev = self._prev.text.upper() 6760 6761 # Different dialects might separate options and values by white space, "=" and "AS" 6762 self._match(TokenType.EQ) 6763 self._match(TokenType.ALIAS) 6764 6765 param = self.expression(exp.CopyParameter, this=option) 6766 6767 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6768 TokenType.L_PAREN, advance=False 6769 ): 6770 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6771 param.set("expressions", self._parse_wrapped_options()) 6772 elif prev == "FILE_FORMAT": 6773 # T-SQL's external file format case 6774 param.set("expression", self._parse_field()) 6775 else: 6776 param.set("expression", self._parse_unquoted_field()) 6777 6778 options.append(param) 6779 self._match(sep) 6780 6781 return options 6782 6783 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6784 expr = self.expression(exp.Credentials) 6785 6786 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6787 expr.set("storage", self._parse_field()) 6788 if self._match_text_seq("CREDENTIALS"): 6789 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6790 creds = ( 6791 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6792 ) 6793 expr.set("credentials", creds) 6794 if self._match_text_seq("ENCRYPTION"): 6795 expr.set("encryption", self._parse_wrapped_options()) 6796 if self._match_text_seq("IAM_ROLE"): 6797 expr.set("iam_role", self._parse_field()) 6798 if self._match_text_seq("REGION"): 6799 expr.set("region", self._parse_field()) 6800 6801 return expr 6802 6803 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6804 return self._parse_field() 6805 6806 def _parse_copy(self) -> exp.Copy | exp.Command: 6807 start = self._prev 6808 6809 self._match(TokenType.INTO) 6810 6811 this = ( 6812 self._parse_select(nested=True, parse_subquery_alias=False) 6813 if self._match(TokenType.L_PAREN, advance=False) 6814 else self._parse_table(schema=True) 6815 ) 6816 6817 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6818 6819 files = self._parse_csv(self._parse_file_location) 6820 credentials = self._parse_credentials() 6821 6822 self._match_text_seq("WITH") 6823 6824 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6825 6826 # Fallback case 6827 if self._curr: 6828 return self._parse_as_command(start) 6829 6830 return self.expression( 6831 exp.Copy, 6832 this=this, 6833 kind=kind, 6834 credentials=credentials, 6835 files=files, 6836 params=params, 6837 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LIST, 198 TokenType.LOWCARDINALITY, 199 TokenType.MAP, 200 TokenType.NULLABLE, 201 *STRUCT_TYPE_TOKENS, 202 } 203 204 ENUM_TYPE_TOKENS = { 205 TokenType.ENUM, 206 TokenType.ENUM8, 207 TokenType.ENUM16, 208 } 209 210 AGGREGATE_TYPE_TOKENS = { 211 TokenType.AGGREGATEFUNCTION, 212 TokenType.SIMPLEAGGREGATEFUNCTION, 213 } 214 215 TYPE_TOKENS = { 216 TokenType.BIT, 217 TokenType.BOOLEAN, 218 TokenType.TINYINT, 219 TokenType.UTINYINT, 220 TokenType.SMALLINT, 221 TokenType.USMALLINT, 222 TokenType.INT, 223 TokenType.UINT, 224 TokenType.BIGINT, 225 TokenType.UBIGINT, 226 TokenType.INT128, 227 TokenType.UINT128, 228 TokenType.INT256, 229 TokenType.UINT256, 230 TokenType.MEDIUMINT, 231 TokenType.UMEDIUMINT, 232 TokenType.FIXEDSTRING, 233 TokenType.FLOAT, 234 TokenType.DOUBLE, 235 TokenType.CHAR, 236 TokenType.NCHAR, 237 TokenType.VARCHAR, 238 TokenType.NVARCHAR, 239 TokenType.BPCHAR, 240 TokenType.TEXT, 241 TokenType.MEDIUMTEXT, 242 TokenType.LONGTEXT, 243 TokenType.MEDIUMBLOB, 244 TokenType.LONGBLOB, 245 TokenType.BINARY, 246 TokenType.VARBINARY, 247 TokenType.JSON, 248 TokenType.JSONB, 249 TokenType.INTERVAL, 250 TokenType.TINYBLOB, 251 TokenType.TINYTEXT, 252 TokenType.TIME, 253 TokenType.TIMETZ, 254 TokenType.TIMESTAMP, 255 TokenType.TIMESTAMP_S, 256 TokenType.TIMESTAMP_MS, 257 TokenType.TIMESTAMP_NS, 258 TokenType.TIMESTAMPTZ, 259 TokenType.TIMESTAMPLTZ, 260 TokenType.TIMESTAMPNTZ, 261 TokenType.DATETIME, 262 TokenType.DATETIME64, 263 TokenType.DATE, 264 TokenType.DATE32, 265 TokenType.INT4RANGE, 266 TokenType.INT4MULTIRANGE, 267 TokenType.INT8RANGE, 268 TokenType.INT8MULTIRANGE, 269 TokenType.NUMRANGE, 270 TokenType.NUMMULTIRANGE, 271 TokenType.TSRANGE, 272 TokenType.TSMULTIRANGE, 273 TokenType.TSTZRANGE, 274 TokenType.TSTZMULTIRANGE, 275 TokenType.DATERANGE, 276 TokenType.DATEMULTIRANGE, 277 TokenType.DECIMAL, 278 TokenType.UDECIMAL, 279 TokenType.BIGDECIMAL, 280 TokenType.UUID, 281 TokenType.GEOGRAPHY, 282 TokenType.GEOMETRY, 283 TokenType.HLLSKETCH, 284 TokenType.HSTORE, 285 TokenType.PSEUDO_TYPE, 286 TokenType.SUPER, 287 TokenType.SERIAL, 288 TokenType.SMALLSERIAL, 289 TokenType.BIGSERIAL, 290 TokenType.XML, 291 TokenType.YEAR, 292 TokenType.UNIQUEIDENTIFIER, 293 TokenType.USERDEFINED, 294 TokenType.MONEY, 295 TokenType.SMALLMONEY, 296 TokenType.ROWVERSION, 297 TokenType.IMAGE, 298 TokenType.VARIANT, 299 TokenType.OBJECT, 300 TokenType.OBJECT_IDENTIFIER, 301 TokenType.INET, 302 TokenType.IPADDRESS, 303 TokenType.IPPREFIX, 304 TokenType.IPV4, 305 TokenType.IPV6, 306 TokenType.UNKNOWN, 307 TokenType.NULL, 308 TokenType.NAME, 309 TokenType.TDIGEST, 310 *ENUM_TYPE_TOKENS, 311 *NESTED_TYPE_TOKENS, 312 *AGGREGATE_TYPE_TOKENS, 313 } 314 315 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 316 TokenType.BIGINT: TokenType.UBIGINT, 317 TokenType.INT: TokenType.UINT, 318 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 319 TokenType.SMALLINT: TokenType.USMALLINT, 320 TokenType.TINYINT: TokenType.UTINYINT, 321 TokenType.DECIMAL: TokenType.UDECIMAL, 322 } 323 324 SUBQUERY_PREDICATES = { 325 TokenType.ANY: exp.Any, 326 TokenType.ALL: exp.All, 327 TokenType.EXISTS: exp.Exists, 328 TokenType.SOME: exp.Any, 329 } 330 331 RESERVED_TOKENS = { 332 *Tokenizer.SINGLE_TOKENS.values(), 333 TokenType.SELECT, 334 } - {TokenType.IDENTIFIER} 335 336 DB_CREATABLES = { 337 TokenType.DATABASE, 338 TokenType.DICTIONARY, 339 TokenType.MODEL, 340 TokenType.SCHEMA, 341 TokenType.SEQUENCE, 342 TokenType.STORAGE_INTEGRATION, 343 TokenType.TABLE, 344 TokenType.TAG, 345 TokenType.VIEW, 346 TokenType.WAREHOUSE, 347 TokenType.STREAMLIT, 348 } 349 350 CREATABLES = { 351 TokenType.COLUMN, 352 TokenType.CONSTRAINT, 353 TokenType.FOREIGN_KEY, 354 TokenType.FUNCTION, 355 TokenType.INDEX, 356 TokenType.PROCEDURE, 357 *DB_CREATABLES, 358 } 359 360 # Tokens that can represent identifiers 361 ID_VAR_TOKENS = { 362 TokenType.VAR, 363 TokenType.ANTI, 364 TokenType.APPLY, 365 TokenType.ASC, 366 TokenType.ASOF, 367 TokenType.AUTO_INCREMENT, 368 TokenType.BEGIN, 369 TokenType.BPCHAR, 370 TokenType.CACHE, 371 TokenType.CASE, 372 TokenType.COLLATE, 373 TokenType.COMMAND, 374 TokenType.COMMENT, 375 TokenType.COMMIT, 376 TokenType.CONSTRAINT, 377 TokenType.COPY, 378 TokenType.DEFAULT, 379 TokenType.DELETE, 380 TokenType.DESC, 381 TokenType.DESCRIBE, 382 TokenType.DICTIONARY, 383 TokenType.DIV, 384 TokenType.END, 385 TokenType.EXECUTE, 386 TokenType.ESCAPE, 387 TokenType.FALSE, 388 TokenType.FIRST, 389 TokenType.FILTER, 390 TokenType.FINAL, 391 TokenType.FORMAT, 392 TokenType.FULL, 393 TokenType.IDENTIFIER, 394 TokenType.IS, 395 TokenType.ISNULL, 396 TokenType.INTERVAL, 397 TokenType.KEEP, 398 TokenType.KILL, 399 TokenType.LEFT, 400 TokenType.LOAD, 401 TokenType.MERGE, 402 TokenType.NATURAL, 403 TokenType.NEXT, 404 TokenType.OFFSET, 405 TokenType.OPERATOR, 406 TokenType.ORDINALITY, 407 TokenType.OVERLAPS, 408 TokenType.OVERWRITE, 409 TokenType.PARTITION, 410 TokenType.PERCENT, 411 TokenType.PIVOT, 412 TokenType.PRAGMA, 413 TokenType.RANGE, 414 TokenType.RECURSIVE, 415 TokenType.REFERENCES, 416 TokenType.REFRESH, 417 TokenType.REPLACE, 418 TokenType.RIGHT, 419 TokenType.ROLLUP, 420 TokenType.ROW, 421 TokenType.ROWS, 422 TokenType.SEMI, 423 TokenType.SET, 424 TokenType.SETTINGS, 425 TokenType.SHOW, 426 TokenType.TEMPORARY, 427 TokenType.TOP, 428 TokenType.TRUE, 429 TokenType.TRUNCATE, 430 TokenType.UNIQUE, 431 TokenType.UNNEST, 432 TokenType.UNPIVOT, 433 TokenType.UPDATE, 434 TokenType.USE, 435 TokenType.VOLATILE, 436 TokenType.WINDOW, 437 *CREATABLES, 438 *SUBQUERY_PREDICATES, 439 *TYPE_TOKENS, 440 *NO_PAREN_FUNCTIONS, 441 } 442 443 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 444 445 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 446 TokenType.ANTI, 447 TokenType.APPLY, 448 TokenType.ASOF, 449 TokenType.FULL, 450 TokenType.LEFT, 451 TokenType.LOCK, 452 TokenType.NATURAL, 453 TokenType.OFFSET, 454 TokenType.RIGHT, 455 TokenType.SEMI, 456 TokenType.WINDOW, 457 } 458 459 ALIAS_TOKENS = ID_VAR_TOKENS 460 461 ARRAY_CONSTRUCTORS = { 462 "ARRAY": exp.Array, 463 "LIST": exp.List, 464 } 465 466 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 467 468 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 469 470 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 471 472 FUNC_TOKENS = { 473 TokenType.COLLATE, 474 TokenType.COMMAND, 475 TokenType.CURRENT_DATE, 476 TokenType.CURRENT_DATETIME, 477 TokenType.CURRENT_TIMESTAMP, 478 TokenType.CURRENT_TIME, 479 TokenType.CURRENT_USER, 480 TokenType.FILTER, 481 TokenType.FIRST, 482 TokenType.FORMAT, 483 TokenType.GLOB, 484 TokenType.IDENTIFIER, 485 TokenType.INDEX, 486 TokenType.ISNULL, 487 TokenType.ILIKE, 488 TokenType.INSERT, 489 TokenType.LIKE, 490 TokenType.MERGE, 491 TokenType.OFFSET, 492 TokenType.PRIMARY_KEY, 493 TokenType.RANGE, 494 TokenType.REPLACE, 495 TokenType.RLIKE, 496 TokenType.ROW, 497 TokenType.UNNEST, 498 TokenType.VAR, 499 TokenType.LEFT, 500 TokenType.RIGHT, 501 TokenType.SEQUENCE, 502 TokenType.DATE, 503 TokenType.DATETIME, 504 TokenType.TABLE, 505 TokenType.TIMESTAMP, 506 TokenType.TIMESTAMPTZ, 507 TokenType.TRUNCATE, 508 TokenType.WINDOW, 509 TokenType.XOR, 510 *TYPE_TOKENS, 511 *SUBQUERY_PREDICATES, 512 } 513 514 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 515 TokenType.AND: exp.And, 516 } 517 518 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.COLON_EQ: exp.PropertyEQ, 520 } 521 522 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.OR: exp.Or, 524 } 525 526 EQUALITY = { 527 TokenType.EQ: exp.EQ, 528 TokenType.NEQ: exp.NEQ, 529 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 530 } 531 532 COMPARISON = { 533 TokenType.GT: exp.GT, 534 TokenType.GTE: exp.GTE, 535 TokenType.LT: exp.LT, 536 TokenType.LTE: exp.LTE, 537 } 538 539 BITWISE = { 540 TokenType.AMP: exp.BitwiseAnd, 541 TokenType.CARET: exp.BitwiseXor, 542 TokenType.PIPE: exp.BitwiseOr, 543 } 544 545 TERM = { 546 TokenType.DASH: exp.Sub, 547 TokenType.PLUS: exp.Add, 548 TokenType.MOD: exp.Mod, 549 TokenType.COLLATE: exp.Collate, 550 } 551 552 FACTOR = { 553 TokenType.DIV: exp.IntDiv, 554 TokenType.LR_ARROW: exp.Distance, 555 TokenType.SLASH: exp.Div, 556 TokenType.STAR: exp.Mul, 557 } 558 559 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 560 561 TIMES = { 562 TokenType.TIME, 563 TokenType.TIMETZ, 564 } 565 566 TIMESTAMPS = { 567 TokenType.TIMESTAMP, 568 TokenType.TIMESTAMPTZ, 569 TokenType.TIMESTAMPLTZ, 570 *TIMES, 571 } 572 573 SET_OPERATIONS = { 574 TokenType.UNION, 575 TokenType.INTERSECT, 576 TokenType.EXCEPT, 577 } 578 579 JOIN_METHODS = { 580 TokenType.ASOF, 581 TokenType.NATURAL, 582 TokenType.POSITIONAL, 583 } 584 585 JOIN_SIDES = { 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.FULL, 589 } 590 591 JOIN_KINDS = { 592 TokenType.ANTI, 593 TokenType.CROSS, 594 TokenType.INNER, 595 TokenType.OUTER, 596 TokenType.SEMI, 597 TokenType.STRAIGHT_JOIN, 598 } 599 600 JOIN_HINTS: t.Set[str] = set() 601 602 LAMBDAS = { 603 TokenType.ARROW: lambda self, expressions: self.expression( 604 exp.Lambda, 605 this=self._replace_lambda( 606 self._parse_assignment(), 607 expressions, 608 ), 609 expressions=expressions, 610 ), 611 TokenType.FARROW: lambda self, expressions: self.expression( 612 exp.Kwarg, 613 this=exp.var(expressions[0].name), 614 expression=self._parse_assignment(), 615 ), 616 } 617 618 COLUMN_OPERATORS = { 619 TokenType.DOT: None, 620 TokenType.DCOLON: lambda self, this, to: self.expression( 621 exp.Cast if self.STRICT_CAST else exp.TryCast, 622 this=this, 623 to=to, 624 ), 625 TokenType.ARROW: lambda self, this, path: self.expression( 626 exp.JSONExtract, 627 this=this, 628 expression=self.dialect.to_json_path(path), 629 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 630 ), 631 TokenType.DARROW: lambda self, this, path: self.expression( 632 exp.JSONExtractScalar, 633 this=this, 634 expression=self.dialect.to_json_path(path), 635 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 636 ), 637 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 638 exp.JSONBExtract, 639 this=this, 640 expression=path, 641 ), 642 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 643 exp.JSONBExtractScalar, 644 this=this, 645 expression=path, 646 ), 647 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 648 exp.JSONBContains, 649 this=this, 650 expression=key, 651 ), 652 } 653 654 EXPRESSION_PARSERS = { 655 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 656 exp.Column: lambda self: self._parse_column(), 657 exp.Condition: lambda self: self._parse_assignment(), 658 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 659 exp.Expression: lambda self: self._parse_expression(), 660 exp.From: lambda self: self._parse_from(joins=True), 661 exp.Group: lambda self: self._parse_group(), 662 exp.Having: lambda self: self._parse_having(), 663 exp.Identifier: lambda self: self._parse_id_var(), 664 exp.Join: lambda self: self._parse_join(), 665 exp.Lambda: lambda self: self._parse_lambda(), 666 exp.Lateral: lambda self: self._parse_lateral(), 667 exp.Limit: lambda self: self._parse_limit(), 668 exp.Offset: lambda self: self._parse_offset(), 669 exp.Order: lambda self: self._parse_order(), 670 exp.Ordered: lambda self: self._parse_ordered(), 671 exp.Properties: lambda self: self._parse_properties(), 672 exp.Qualify: lambda self: self._parse_qualify(), 673 exp.Returning: lambda self: self._parse_returning(), 674 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 675 exp.Table: lambda self: self._parse_table_parts(), 676 exp.TableAlias: lambda self: self._parse_table_alias(), 677 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 678 exp.Where: lambda self: self._parse_where(), 679 exp.Window: lambda self: self._parse_named_window(), 680 exp.With: lambda self: self._parse_with(), 681 "JOIN_TYPE": lambda self: self._parse_join_parts(), 682 } 683 684 STATEMENT_PARSERS = { 685 TokenType.ALTER: lambda self: self._parse_alter(), 686 TokenType.BEGIN: lambda self: self._parse_transaction(), 687 TokenType.CACHE: lambda self: self._parse_cache(), 688 TokenType.COMMENT: lambda self: self._parse_comment(), 689 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 690 TokenType.COPY: lambda self: self._parse_copy(), 691 TokenType.CREATE: lambda self: self._parse_create(), 692 TokenType.DELETE: lambda self: self._parse_delete(), 693 TokenType.DESC: lambda self: self._parse_describe(), 694 TokenType.DESCRIBE: lambda self: self._parse_describe(), 695 TokenType.DROP: lambda self: self._parse_drop(), 696 TokenType.INSERT: lambda self: self._parse_insert(), 697 TokenType.KILL: lambda self: self._parse_kill(), 698 TokenType.LOAD: lambda self: self._parse_load(), 699 TokenType.MERGE: lambda self: self._parse_merge(), 700 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 701 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 702 TokenType.REFRESH: lambda self: self._parse_refresh(), 703 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 704 TokenType.SET: lambda self: self._parse_set(), 705 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 706 TokenType.UNCACHE: lambda self: self._parse_uncache(), 707 TokenType.UPDATE: lambda self: self._parse_update(), 708 TokenType.USE: lambda self: self.expression( 709 exp.Use, 710 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 711 this=self._parse_table(schema=False), 712 ), 713 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 714 } 715 716 UNARY_PARSERS = { 717 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 718 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 719 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 720 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 721 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 722 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 723 } 724 725 STRING_PARSERS = { 726 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 727 exp.RawString, this=token.text 728 ), 729 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 730 exp.National, this=token.text 731 ), 732 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 733 TokenType.STRING: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=True 735 ), 736 TokenType.UNICODE_STRING: lambda self, token: self.expression( 737 exp.UnicodeString, 738 this=token.text, 739 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 740 ), 741 } 742 743 NUMERIC_PARSERS = { 744 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 745 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 746 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 747 TokenType.NUMBER: lambda self, token: self.expression( 748 exp.Literal, this=token.text, is_string=False 749 ), 750 } 751 752 PRIMARY_PARSERS = { 753 **STRING_PARSERS, 754 **NUMERIC_PARSERS, 755 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 756 TokenType.NULL: lambda self, _: self.expression(exp.Null), 757 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 758 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 759 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 760 TokenType.STAR: lambda self, _: self.expression( 761 exp.Star, 762 **{ 763 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 764 "replace": self._parse_star_op("REPLACE"), 765 "rename": self._parse_star_op("RENAME"), 766 }, 767 ), 768 } 769 770 PLACEHOLDER_PARSERS = { 771 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 772 TokenType.PARAMETER: lambda self: self._parse_parameter(), 773 TokenType.COLON: lambda self: ( 774 self.expression(exp.Placeholder, this=self._prev.text) 775 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 776 else None 777 ), 778 } 779 780 RANGE_PARSERS = { 781 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 782 TokenType.GLOB: binary_range_parser(exp.Glob), 783 TokenType.ILIKE: binary_range_parser(exp.ILike), 784 TokenType.IN: lambda self, this: self._parse_in(this), 785 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 786 TokenType.IS: lambda self, this: self._parse_is(this), 787 TokenType.LIKE: binary_range_parser(exp.Like), 788 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 789 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 790 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 791 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 792 } 793 794 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 795 "ALLOWED_VALUES": lambda self: self.expression( 796 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 797 ), 798 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 799 "AUTO": lambda self: self._parse_auto_property(), 800 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 801 "BACKUP": lambda self: self.expression( 802 exp.BackupProperty, this=self._parse_var(any_token=True) 803 ), 804 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 805 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 806 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 807 "CHECKSUM": lambda self: self._parse_checksum(), 808 "CLUSTER BY": lambda self: self._parse_cluster(), 809 "CLUSTERED": lambda self: self._parse_clustered_by(), 810 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 811 exp.CollateProperty, **kwargs 812 ), 813 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 814 "CONTAINS": lambda self: self._parse_contains_property(), 815 "COPY": lambda self: self._parse_copy_property(), 816 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 817 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 818 "DEFINER": lambda self: self._parse_definer(), 819 "DETERMINISTIC": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "DISTKEY": lambda self: self._parse_distkey(), 823 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 824 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 825 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 826 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 827 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 828 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 829 "FREESPACE": lambda self: self._parse_freespace(), 830 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 831 "HEAP": lambda self: self.expression(exp.HeapProperty), 832 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 833 "IMMUTABLE": lambda self: self.expression( 834 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 835 ), 836 "INHERITS": lambda self: self.expression( 837 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 838 ), 839 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 840 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 841 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 842 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 843 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 844 "LIKE": lambda self: self._parse_create_like(), 845 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 846 "LOCK": lambda self: self._parse_locking(), 847 "LOCKING": lambda self: self._parse_locking(), 848 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 849 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 850 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 851 "MODIFIES": lambda self: self._parse_modifies_property(), 852 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 853 "NO": lambda self: self._parse_no_property(), 854 "ON": lambda self: self._parse_on_property(), 855 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 856 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 857 "PARTITION": lambda self: self._parse_partitioned_of(), 858 "PARTITION BY": lambda self: self._parse_partitioned_by(), 859 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 860 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 861 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 862 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 863 "READS": lambda self: self._parse_reads_property(), 864 "REMOTE": lambda self: self._parse_remote_with_connection(), 865 "RETURNS": lambda self: self._parse_returns(), 866 "STRICT": lambda self: self.expression(exp.StrictProperty), 867 "ROW": lambda self: self._parse_row(), 868 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 869 "SAMPLE": lambda self: self.expression( 870 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 871 ), 872 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 873 "SETTINGS": lambda self: self.expression( 874 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 875 ), 876 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 877 "SORTKEY": lambda self: self._parse_sortkey(), 878 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 879 "STABLE": lambda self: self.expression( 880 exp.StabilityProperty, this=exp.Literal.string("STABLE") 881 ), 882 "STORED": lambda self: self._parse_stored(), 883 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 884 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 885 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 886 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 887 "TO": lambda self: self._parse_to_table(), 888 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 889 "TRANSFORM": lambda self: self.expression( 890 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 891 ), 892 "TTL": lambda self: self._parse_ttl(), 893 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 894 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 895 "VOLATILE": lambda self: self._parse_volatile_property(), 896 "WITH": lambda self: self._parse_with_property(), 897 } 898 899 CONSTRAINT_PARSERS = { 900 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 901 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 902 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 903 "CHARACTER SET": lambda self: self.expression( 904 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 905 ), 906 "CHECK": lambda self: self.expression( 907 exp.CheckColumnConstraint, 908 this=self._parse_wrapped(self._parse_assignment), 909 enforced=self._match_text_seq("ENFORCED"), 910 ), 911 "COLLATE": lambda self: self.expression( 912 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 913 ), 914 "COMMENT": lambda self: self.expression( 915 exp.CommentColumnConstraint, this=self._parse_string() 916 ), 917 "COMPRESS": lambda self: self._parse_compress(), 918 "CLUSTERED": lambda self: self.expression( 919 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 920 ), 921 "NONCLUSTERED": lambda self: self.expression( 922 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 923 ), 924 "DEFAULT": lambda self: self.expression( 925 exp.DefaultColumnConstraint, this=self._parse_bitwise() 926 ), 927 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 928 "EPHEMERAL": lambda self: self.expression( 929 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 930 ), 931 "EXCLUDE": lambda self: self.expression( 932 exp.ExcludeColumnConstraint, this=self._parse_index_params() 933 ), 934 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 935 "FORMAT": lambda self: self.expression( 936 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 937 ), 938 "GENERATED": lambda self: self._parse_generated_as_identity(), 939 "IDENTITY": lambda self: self._parse_auto_increment(), 940 "INLINE": lambda self: self._parse_inline(), 941 "LIKE": lambda self: self._parse_create_like(), 942 "NOT": lambda self: self._parse_not_constraint(), 943 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 944 "ON": lambda self: ( 945 self._match(TokenType.UPDATE) 946 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 947 ) 948 or self.expression(exp.OnProperty, this=self._parse_id_var()), 949 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 950 "PERIOD": lambda self: self._parse_period_for_system_time(), 951 "PRIMARY KEY": lambda self: self._parse_primary_key(), 952 "REFERENCES": lambda self: self._parse_references(match=False), 953 "TITLE": lambda self: self.expression( 954 exp.TitleColumnConstraint, this=self._parse_var_or_string() 955 ), 956 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 957 "UNIQUE": lambda self: self._parse_unique(), 958 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 959 "WITH": lambda self: self.expression( 960 exp.Properties, expressions=self._parse_wrapped_properties() 961 ), 962 } 963 964 ALTER_PARSERS = { 965 "ADD": lambda self: self._parse_alter_table_add(), 966 "ALTER": lambda self: self._parse_alter_table_alter(), 967 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 968 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 969 "DROP": lambda self: self._parse_alter_table_drop(), 970 "RENAME": lambda self: self._parse_alter_table_rename(), 971 "SET": lambda self: self._parse_alter_table_set(), 972 } 973 974 ALTER_ALTER_PARSERS = { 975 "DISTKEY": lambda self: self._parse_alter_diststyle(), 976 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 977 "SORTKEY": lambda self: self._parse_alter_sortkey(), 978 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 979 } 980 981 SCHEMA_UNNAMED_CONSTRAINTS = { 982 "CHECK", 983 "EXCLUDE", 984 "FOREIGN KEY", 985 "LIKE", 986 "PERIOD", 987 "PRIMARY KEY", 988 "UNIQUE", 989 } 990 991 NO_PAREN_FUNCTION_PARSERS = { 992 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 993 "CASE": lambda self: self._parse_case(), 994 "IF": lambda self: self._parse_if(), 995 "NEXT": lambda self: self._parse_next_value_for(), 996 } 997 998 INVALID_FUNC_NAME_TOKENS = { 999 TokenType.IDENTIFIER, 1000 TokenType.STRING, 1001 } 1002 1003 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1004 1005 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1006 1007 FUNCTION_PARSERS = { 1008 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1009 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1010 "DECODE": lambda self: self._parse_decode(), 1011 "EXTRACT": lambda self: self._parse_extract(), 1012 "GAP_FILL": lambda self: self._parse_gap_fill(), 1013 "JSON_OBJECT": lambda self: self._parse_json_object(), 1014 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1015 "JSON_TABLE": lambda self: self._parse_json_table(), 1016 "MATCH": lambda self: self._parse_match_against(), 1017 "OPENJSON": lambda self: self._parse_open_json(), 1018 "POSITION": lambda self: self._parse_position(), 1019 "PREDICT": lambda self: self._parse_predict(), 1020 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1021 "STRING_AGG": lambda self: self._parse_string_agg(), 1022 "SUBSTRING": lambda self: self._parse_substring(), 1023 "TRIM": lambda self: self._parse_trim(), 1024 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1025 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1026 } 1027 1028 QUERY_MODIFIER_PARSERS = { 1029 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1030 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1031 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1032 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1033 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1034 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1035 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1036 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1037 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1038 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1039 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1040 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1041 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1042 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1043 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1044 TokenType.CLUSTER_BY: lambda self: ( 1045 "cluster", 1046 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1047 ), 1048 TokenType.DISTRIBUTE_BY: lambda self: ( 1049 "distribute", 1050 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1051 ), 1052 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1053 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1054 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1055 } 1056 1057 SET_PARSERS = { 1058 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1059 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1060 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1061 "TRANSACTION": lambda self: self._parse_set_transaction(), 1062 } 1063 1064 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1065 1066 TYPE_LITERAL_PARSERS = { 1067 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1068 } 1069 1070 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1071 1072 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1073 1074 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1075 1076 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1077 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1078 "ISOLATION": ( 1079 ("LEVEL", "REPEATABLE", "READ"), 1080 ("LEVEL", "READ", "COMMITTED"), 1081 ("LEVEL", "READ", "UNCOMITTED"), 1082 ("LEVEL", "SERIALIZABLE"), 1083 ), 1084 "READ": ("WRITE", "ONLY"), 1085 } 1086 1087 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1088 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1089 ) 1090 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1091 1092 CREATE_SEQUENCE: OPTIONS_TYPE = { 1093 "SCALE": ("EXTEND", "NOEXTEND"), 1094 "SHARD": ("EXTEND", "NOEXTEND"), 1095 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1096 **dict.fromkeys( 1097 ( 1098 "SESSION", 1099 "GLOBAL", 1100 "KEEP", 1101 "NOKEEP", 1102 "ORDER", 1103 "NOORDER", 1104 "NOCACHE", 1105 "CYCLE", 1106 "NOCYCLE", 1107 "NOMINVALUE", 1108 "NOMAXVALUE", 1109 "NOSCALE", 1110 "NOSHARD", 1111 ), 1112 tuple(), 1113 ), 1114 } 1115 1116 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1117 1118 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1119 1120 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1121 1122 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1123 1124 CLONE_KEYWORDS = {"CLONE", "COPY"} 1125 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1126 1127 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1128 1129 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1130 1131 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1132 1133 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1134 1135 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1136 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1137 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1138 1139 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1140 1141 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1142 1143 ADD_CONSTRAINT_TOKENS = { 1144 TokenType.CONSTRAINT, 1145 TokenType.FOREIGN_KEY, 1146 TokenType.INDEX, 1147 TokenType.KEY, 1148 TokenType.PRIMARY_KEY, 1149 TokenType.UNIQUE, 1150 } 1151 1152 DISTINCT_TOKENS = {TokenType.DISTINCT} 1153 1154 NULL_TOKENS = {TokenType.NULL} 1155 1156 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1157 1158 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1159 1160 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1161 1162 STRICT_CAST = True 1163 1164 PREFIXED_PIVOT_COLUMNS = False 1165 IDENTIFY_PIVOT_STRINGS = False 1166 1167 LOG_DEFAULTS_TO_LN = False 1168 1169 # Whether ADD is present for each column added by ALTER TABLE 1170 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1171 1172 # Whether the table sample clause expects CSV syntax 1173 TABLESAMPLE_CSV = False 1174 1175 # The default method used for table sampling 1176 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1177 1178 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1179 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1180 1181 # Whether the TRIM function expects the characters to trim as its first argument 1182 TRIM_PATTERN_FIRST = False 1183 1184 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1185 STRING_ALIASES = False 1186 1187 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1188 MODIFIERS_ATTACHED_TO_UNION = True 1189 UNION_MODIFIERS = {"order", "limit", "offset"} 1190 1191 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1192 NO_PAREN_IF_COMMANDS = True 1193 1194 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1195 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1196 1197 # Whether the `:` operator is used to extract a value from a JSON document 1198 COLON_IS_JSON_EXTRACT = False 1199 1200 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1201 # If this is True and '(' is not found, the keyword will be treated as an identifier 1202 VALUES_FOLLOWED_BY_PAREN = True 1203 1204 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1205 SUPPORTS_IMPLICIT_UNNEST = False 1206 1207 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1208 INTERVAL_SPANS = True 1209 1210 # Whether a PARTITION clause can follow a table reference 1211 SUPPORTS_PARTITION_SELECTION = False 1212 1213 __slots__ = ( 1214 "error_level", 1215 "error_message_context", 1216 "max_errors", 1217 "dialect", 1218 "sql", 1219 "errors", 1220 "_tokens", 1221 "_index", 1222 "_curr", 1223 "_next", 1224 "_prev", 1225 "_prev_comments", 1226 ) 1227 1228 # Autofilled 1229 SHOW_TRIE: t.Dict = {} 1230 SET_TRIE: t.Dict = {} 1231 1232 def __init__( 1233 self, 1234 error_level: t.Optional[ErrorLevel] = None, 1235 error_message_context: int = 100, 1236 max_errors: int = 3, 1237 dialect: DialectType = None, 1238 ): 1239 from sqlglot.dialects import Dialect 1240 1241 self.error_level = error_level or ErrorLevel.IMMEDIATE 1242 self.error_message_context = error_message_context 1243 self.max_errors = max_errors 1244 self.dialect = Dialect.get_or_raise(dialect) 1245 self.reset() 1246 1247 def reset(self): 1248 self.sql = "" 1249 self.errors = [] 1250 self._tokens = [] 1251 self._index = 0 1252 self._curr = None 1253 self._next = None 1254 self._prev = None 1255 self._prev_comments = None 1256 1257 def parse( 1258 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1259 ) -> t.List[t.Optional[exp.Expression]]: 1260 """ 1261 Parses a list of tokens and returns a list of syntax trees, one tree 1262 per parsed SQL statement. 1263 1264 Args: 1265 raw_tokens: The list of tokens. 1266 sql: The original SQL string, used to produce helpful debug messages. 1267 1268 Returns: 1269 The list of the produced syntax trees. 1270 """ 1271 return self._parse( 1272 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1273 ) 1274 1275 def parse_into( 1276 self, 1277 expression_types: exp.IntoType, 1278 raw_tokens: t.List[Token], 1279 sql: t.Optional[str] = None, 1280 ) -> t.List[t.Optional[exp.Expression]]: 1281 """ 1282 Parses a list of tokens into a given Expression type. If a collection of Expression 1283 types is given instead, this method will try to parse the token list into each one 1284 of them, stopping at the first for which the parsing succeeds. 1285 1286 Args: 1287 expression_types: The expression type(s) to try and parse the token list into. 1288 raw_tokens: The list of tokens. 1289 sql: The original SQL string, used to produce helpful debug messages. 1290 1291 Returns: 1292 The target Expression. 1293 """ 1294 errors = [] 1295 for expression_type in ensure_list(expression_types): 1296 parser = self.EXPRESSION_PARSERS.get(expression_type) 1297 if not parser: 1298 raise TypeError(f"No parser registered for {expression_type}") 1299 1300 try: 1301 return self._parse(parser, raw_tokens, sql) 1302 except ParseError as e: 1303 e.errors[0]["into_expression"] = expression_type 1304 errors.append(e) 1305 1306 raise ParseError( 1307 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1308 errors=merge_errors(errors), 1309 ) from errors[-1] 1310 1311 def _parse( 1312 self, 1313 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1314 raw_tokens: t.List[Token], 1315 sql: t.Optional[str] = None, 1316 ) -> t.List[t.Optional[exp.Expression]]: 1317 self.reset() 1318 self.sql = sql or "" 1319 1320 total = len(raw_tokens) 1321 chunks: t.List[t.List[Token]] = [[]] 1322 1323 for i, token in enumerate(raw_tokens): 1324 if token.token_type == TokenType.SEMICOLON: 1325 if token.comments: 1326 chunks.append([token]) 1327 1328 if i < total - 1: 1329 chunks.append([]) 1330 else: 1331 chunks[-1].append(token) 1332 1333 expressions = [] 1334 1335 for tokens in chunks: 1336 self._index = -1 1337 self._tokens = tokens 1338 self._advance() 1339 1340 expressions.append(parse_method(self)) 1341 1342 if self._index < len(self._tokens): 1343 self.raise_error("Invalid expression / Unexpected token") 1344 1345 self.check_errors() 1346 1347 return expressions 1348 1349 def check_errors(self) -> None: 1350 """Logs or raises any found errors, depending on the chosen error level setting.""" 1351 if self.error_level == ErrorLevel.WARN: 1352 for error in self.errors: 1353 logger.error(str(error)) 1354 elif self.error_level == ErrorLevel.RAISE and self.errors: 1355 raise ParseError( 1356 concat_messages(self.errors, self.max_errors), 1357 errors=merge_errors(self.errors), 1358 ) 1359 1360 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1361 """ 1362 Appends an error in the list of recorded errors or raises it, depending on the chosen 1363 error level setting. 1364 """ 1365 token = token or self._curr or self._prev or Token.string("") 1366 start = token.start 1367 end = token.end + 1 1368 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1369 highlight = self.sql[start:end] 1370 end_context = self.sql[end : end + self.error_message_context] 1371 1372 error = ParseError.new( 1373 f"{message}. Line {token.line}, Col: {token.col}.\n" 1374 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1375 description=message, 1376 line=token.line, 1377 col=token.col, 1378 start_context=start_context, 1379 highlight=highlight, 1380 end_context=end_context, 1381 ) 1382 1383 if self.error_level == ErrorLevel.IMMEDIATE: 1384 raise error 1385 1386 self.errors.append(error) 1387 1388 def expression( 1389 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1390 ) -> E: 1391 """ 1392 Creates a new, validated Expression. 1393 1394 Args: 1395 exp_class: The expression class to instantiate. 1396 comments: An optional list of comments to attach to the expression. 1397 kwargs: The arguments to set for the expression along with their respective values. 1398 1399 Returns: 1400 The target expression. 1401 """ 1402 instance = exp_class(**kwargs) 1403 instance.add_comments(comments) if comments else self._add_comments(instance) 1404 return self.validate_expression(instance) 1405 1406 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1407 if expression and self._prev_comments: 1408 expression.add_comments(self._prev_comments) 1409 self._prev_comments = None 1410 1411 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1412 """ 1413 Validates an Expression, making sure that all its mandatory arguments are set. 1414 1415 Args: 1416 expression: The expression to validate. 1417 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1418 1419 Returns: 1420 The validated expression. 1421 """ 1422 if self.error_level != ErrorLevel.IGNORE: 1423 for error_message in expression.error_messages(args): 1424 self.raise_error(error_message) 1425 1426 return expression 1427 1428 def _find_sql(self, start: Token, end: Token) -> str: 1429 return self.sql[start.start : end.end + 1] 1430 1431 def _is_connected(self) -> bool: 1432 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1433 1434 def _advance(self, times: int = 1) -> None: 1435 self._index += times 1436 self._curr = seq_get(self._tokens, self._index) 1437 self._next = seq_get(self._tokens, self._index + 1) 1438 1439 if self._index > 0: 1440 self._prev = self._tokens[self._index - 1] 1441 self._prev_comments = self._prev.comments 1442 else: 1443 self._prev = None 1444 self._prev_comments = None 1445 1446 def _retreat(self, index: int) -> None: 1447 if index != self._index: 1448 self._advance(index - self._index) 1449 1450 def _warn_unsupported(self) -> None: 1451 if len(self._tokens) <= 1: 1452 return 1453 1454 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1455 # interested in emitting a warning for the one being currently processed. 1456 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1457 1458 logger.warning( 1459 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1460 ) 1461 1462 def _parse_command(self) -> exp.Command: 1463 self._warn_unsupported() 1464 return self.expression( 1465 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1466 ) 1467 1468 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1469 """ 1470 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1471 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1472 the parser state accordingly 1473 """ 1474 index = self._index 1475 error_level = self.error_level 1476 1477 self.error_level = ErrorLevel.IMMEDIATE 1478 try: 1479 this = parse_method() 1480 except ParseError: 1481 this = None 1482 finally: 1483 if not this or retreat: 1484 self._retreat(index) 1485 self.error_level = error_level 1486 1487 return this 1488 1489 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1490 start = self._prev 1491 exists = self._parse_exists() if allow_exists else None 1492 1493 self._match(TokenType.ON) 1494 1495 materialized = self._match_text_seq("MATERIALIZED") 1496 kind = self._match_set(self.CREATABLES) and self._prev 1497 if not kind: 1498 return self._parse_as_command(start) 1499 1500 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1501 this = self._parse_user_defined_function(kind=kind.token_type) 1502 elif kind.token_type == TokenType.TABLE: 1503 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1504 elif kind.token_type == TokenType.COLUMN: 1505 this = self._parse_column() 1506 else: 1507 this = self._parse_id_var() 1508 1509 self._match(TokenType.IS) 1510 1511 return self.expression( 1512 exp.Comment, 1513 this=this, 1514 kind=kind.text, 1515 expression=self._parse_string(), 1516 exists=exists, 1517 materialized=materialized, 1518 ) 1519 1520 def _parse_to_table( 1521 self, 1522 ) -> exp.ToTableProperty: 1523 table = self._parse_table_parts(schema=True) 1524 return self.expression(exp.ToTableProperty, this=table) 1525 1526 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1527 def _parse_ttl(self) -> exp.Expression: 1528 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1529 this = self._parse_bitwise() 1530 1531 if self._match_text_seq("DELETE"): 1532 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1533 if self._match_text_seq("RECOMPRESS"): 1534 return self.expression( 1535 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1536 ) 1537 if self._match_text_seq("TO", "DISK"): 1538 return self.expression( 1539 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1540 ) 1541 if self._match_text_seq("TO", "VOLUME"): 1542 return self.expression( 1543 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1544 ) 1545 1546 return this 1547 1548 expressions = self._parse_csv(_parse_ttl_action) 1549 where = self._parse_where() 1550 group = self._parse_group() 1551 1552 aggregates = None 1553 if group and self._match(TokenType.SET): 1554 aggregates = self._parse_csv(self._parse_set_item) 1555 1556 return self.expression( 1557 exp.MergeTreeTTL, 1558 expressions=expressions, 1559 where=where, 1560 group=group, 1561 aggregates=aggregates, 1562 ) 1563 1564 def _parse_statement(self) -> t.Optional[exp.Expression]: 1565 if self._curr is None: 1566 return None 1567 1568 if self._match_set(self.STATEMENT_PARSERS): 1569 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1570 1571 if self._match_set(self.dialect.tokenizer.COMMANDS): 1572 return self._parse_command() 1573 1574 expression = self._parse_expression() 1575 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1576 return self._parse_query_modifiers(expression) 1577 1578 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1579 start = self._prev 1580 temporary = self._match(TokenType.TEMPORARY) 1581 materialized = self._match_text_seq("MATERIALIZED") 1582 1583 kind = self._match_set(self.CREATABLES) and self._prev.text 1584 if not kind: 1585 return self._parse_as_command(start) 1586 1587 if_exists = exists or self._parse_exists() 1588 table = self._parse_table_parts( 1589 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1590 ) 1591 1592 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1593 1594 if self._match(TokenType.L_PAREN, advance=False): 1595 expressions = self._parse_wrapped_csv(self._parse_types) 1596 else: 1597 expressions = None 1598 1599 return self.expression( 1600 exp.Drop, 1601 comments=start.comments, 1602 exists=if_exists, 1603 this=table, 1604 expressions=expressions, 1605 kind=kind.upper(), 1606 temporary=temporary, 1607 materialized=materialized, 1608 cascade=self._match_text_seq("CASCADE"), 1609 constraints=self._match_text_seq("CONSTRAINTS"), 1610 purge=self._match_text_seq("PURGE"), 1611 cluster=cluster, 1612 ) 1613 1614 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1615 return ( 1616 self._match_text_seq("IF") 1617 and (not not_ or self._match(TokenType.NOT)) 1618 and self._match(TokenType.EXISTS) 1619 ) 1620 1621 def _parse_create(self) -> exp.Create | exp.Command: 1622 # Note: this can't be None because we've matched a statement parser 1623 start = self._prev 1624 comments = self._prev_comments 1625 1626 replace = ( 1627 start.token_type == TokenType.REPLACE 1628 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1629 or self._match_pair(TokenType.OR, TokenType.ALTER) 1630 ) 1631 1632 unique = self._match(TokenType.UNIQUE) 1633 1634 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1635 self._advance() 1636 1637 properties = None 1638 create_token = self._match_set(self.CREATABLES) and self._prev 1639 1640 if not create_token: 1641 # exp.Properties.Location.POST_CREATE 1642 properties = self._parse_properties() 1643 create_token = self._match_set(self.CREATABLES) and self._prev 1644 1645 if not properties or not create_token: 1646 return self._parse_as_command(start) 1647 1648 exists = self._parse_exists(not_=True) 1649 this = None 1650 expression: t.Optional[exp.Expression] = None 1651 indexes = None 1652 no_schema_binding = None 1653 begin = None 1654 end = None 1655 clone = None 1656 1657 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1658 nonlocal properties 1659 if properties and temp_props: 1660 properties.expressions.extend(temp_props.expressions) 1661 elif temp_props: 1662 properties = temp_props 1663 1664 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1665 this = self._parse_user_defined_function(kind=create_token.token_type) 1666 1667 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1668 extend_props(self._parse_properties()) 1669 1670 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1671 extend_props(self._parse_properties()) 1672 1673 if not expression: 1674 if self._match(TokenType.COMMAND): 1675 expression = self._parse_as_command(self._prev) 1676 else: 1677 begin = self._match(TokenType.BEGIN) 1678 return_ = self._match_text_seq("RETURN") 1679 1680 if self._match(TokenType.STRING, advance=False): 1681 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1682 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1683 expression = self._parse_string() 1684 extend_props(self._parse_properties()) 1685 else: 1686 expression = self._parse_statement() 1687 1688 end = self._match_text_seq("END") 1689 1690 if return_: 1691 expression = self.expression(exp.Return, this=expression) 1692 elif create_token.token_type == TokenType.INDEX: 1693 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1694 if not self._match(TokenType.ON): 1695 index = self._parse_id_var() 1696 anonymous = False 1697 else: 1698 index = None 1699 anonymous = True 1700 1701 this = self._parse_index(index=index, anonymous=anonymous) 1702 elif create_token.token_type in self.DB_CREATABLES: 1703 table_parts = self._parse_table_parts( 1704 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1705 ) 1706 1707 # exp.Properties.Location.POST_NAME 1708 self._match(TokenType.COMMA) 1709 extend_props(self._parse_properties(before=True)) 1710 1711 this = self._parse_schema(this=table_parts) 1712 1713 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1714 extend_props(self._parse_properties()) 1715 1716 self._match(TokenType.ALIAS) 1717 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1718 # exp.Properties.Location.POST_ALIAS 1719 extend_props(self._parse_properties()) 1720 1721 if create_token.token_type == TokenType.SEQUENCE: 1722 expression = self._parse_types() 1723 extend_props(self._parse_properties()) 1724 else: 1725 expression = self._parse_ddl_select() 1726 1727 if create_token.token_type == TokenType.TABLE: 1728 # exp.Properties.Location.POST_EXPRESSION 1729 extend_props(self._parse_properties()) 1730 1731 indexes = [] 1732 while True: 1733 index = self._parse_index() 1734 1735 # exp.Properties.Location.POST_INDEX 1736 extend_props(self._parse_properties()) 1737 1738 if not index: 1739 break 1740 else: 1741 self._match(TokenType.COMMA) 1742 indexes.append(index) 1743 elif create_token.token_type == TokenType.VIEW: 1744 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1745 no_schema_binding = True 1746 1747 shallow = self._match_text_seq("SHALLOW") 1748 1749 if self._match_texts(self.CLONE_KEYWORDS): 1750 copy = self._prev.text.lower() == "copy" 1751 clone = self.expression( 1752 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1753 ) 1754 1755 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1756 return self._parse_as_command(start) 1757 1758 return self.expression( 1759 exp.Create, 1760 comments=comments, 1761 this=this, 1762 kind=create_token.text.upper(), 1763 replace=replace, 1764 unique=unique, 1765 expression=expression, 1766 exists=exists, 1767 properties=properties, 1768 indexes=indexes, 1769 no_schema_binding=no_schema_binding, 1770 begin=begin, 1771 end=end, 1772 clone=clone, 1773 ) 1774 1775 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1776 seq = exp.SequenceProperties() 1777 1778 options = [] 1779 index = self._index 1780 1781 while self._curr: 1782 self._match(TokenType.COMMA) 1783 if self._match_text_seq("INCREMENT"): 1784 self._match_text_seq("BY") 1785 self._match_text_seq("=") 1786 seq.set("increment", self._parse_term()) 1787 elif self._match_text_seq("MINVALUE"): 1788 seq.set("minvalue", self._parse_term()) 1789 elif self._match_text_seq("MAXVALUE"): 1790 seq.set("maxvalue", self._parse_term()) 1791 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1792 self._match_text_seq("=") 1793 seq.set("start", self._parse_term()) 1794 elif self._match_text_seq("CACHE"): 1795 # T-SQL allows empty CACHE which is initialized dynamically 1796 seq.set("cache", self._parse_number() or True) 1797 elif self._match_text_seq("OWNED", "BY"): 1798 # "OWNED BY NONE" is the default 1799 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1800 else: 1801 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1802 if opt: 1803 options.append(opt) 1804 else: 1805 break 1806 1807 seq.set("options", options if options else None) 1808 return None if self._index == index else seq 1809 1810 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1811 # only used for teradata currently 1812 self._match(TokenType.COMMA) 1813 1814 kwargs = { 1815 "no": self._match_text_seq("NO"), 1816 "dual": self._match_text_seq("DUAL"), 1817 "before": self._match_text_seq("BEFORE"), 1818 "default": self._match_text_seq("DEFAULT"), 1819 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1820 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1821 "after": self._match_text_seq("AFTER"), 1822 "minimum": self._match_texts(("MIN", "MINIMUM")), 1823 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1824 } 1825 1826 if self._match_texts(self.PROPERTY_PARSERS): 1827 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1828 try: 1829 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1830 except TypeError: 1831 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1832 1833 return None 1834 1835 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1836 return self._parse_wrapped_csv(self._parse_property) 1837 1838 def _parse_property(self) -> t.Optional[exp.Expression]: 1839 if self._match_texts(self.PROPERTY_PARSERS): 1840 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1841 1842 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1843 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1844 1845 if self._match_text_seq("COMPOUND", "SORTKEY"): 1846 return self._parse_sortkey(compound=True) 1847 1848 if self._match_text_seq("SQL", "SECURITY"): 1849 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1850 1851 index = self._index 1852 key = self._parse_column() 1853 1854 if not self._match(TokenType.EQ): 1855 self._retreat(index) 1856 return self._parse_sequence_properties() 1857 1858 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1859 if isinstance(key, exp.Column): 1860 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1861 1862 value = self._parse_bitwise() or self._parse_var(any_token=True) 1863 1864 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1865 if isinstance(value, exp.Column): 1866 value = exp.var(value.name) 1867 1868 return self.expression(exp.Property, this=key, value=value) 1869 1870 def _parse_stored(self) -> exp.FileFormatProperty: 1871 self._match(TokenType.ALIAS) 1872 1873 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1874 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1875 1876 return self.expression( 1877 exp.FileFormatProperty, 1878 this=( 1879 self.expression( 1880 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1881 ) 1882 if input_format or output_format 1883 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1884 ), 1885 ) 1886 1887 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1888 field = self._parse_field() 1889 if isinstance(field, exp.Identifier) and not field.quoted: 1890 field = exp.var(field) 1891 1892 return field 1893 1894 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1895 self._match(TokenType.EQ) 1896 self._match(TokenType.ALIAS) 1897 1898 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1899 1900 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1901 properties = [] 1902 while True: 1903 if before: 1904 prop = self._parse_property_before() 1905 else: 1906 prop = self._parse_property() 1907 if not prop: 1908 break 1909 for p in ensure_list(prop): 1910 properties.append(p) 1911 1912 if properties: 1913 return self.expression(exp.Properties, expressions=properties) 1914 1915 return None 1916 1917 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1918 return self.expression( 1919 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1920 ) 1921 1922 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1923 if self._index >= 2: 1924 pre_volatile_token = self._tokens[self._index - 2] 1925 else: 1926 pre_volatile_token = None 1927 1928 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1929 return exp.VolatileProperty() 1930 1931 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1932 1933 def _parse_retention_period(self) -> exp.Var: 1934 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1935 number = self._parse_number() 1936 number_str = f"{number} " if number else "" 1937 unit = self._parse_var(any_token=True) 1938 return exp.var(f"{number_str}{unit}") 1939 1940 def _parse_system_versioning_property( 1941 self, with_: bool = False 1942 ) -> exp.WithSystemVersioningProperty: 1943 self._match(TokenType.EQ) 1944 prop = self.expression( 1945 exp.WithSystemVersioningProperty, 1946 **{ # type: ignore 1947 "on": True, 1948 "with": with_, 1949 }, 1950 ) 1951 1952 if self._match_text_seq("OFF"): 1953 prop.set("on", False) 1954 return prop 1955 1956 self._match(TokenType.ON) 1957 if self._match(TokenType.L_PAREN): 1958 while self._curr and not self._match(TokenType.R_PAREN): 1959 if self._match_text_seq("HISTORY_TABLE", "="): 1960 prop.set("this", self._parse_table_parts()) 1961 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1962 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1963 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1964 prop.set("retention_period", self._parse_retention_period()) 1965 1966 self._match(TokenType.COMMA) 1967 1968 return prop 1969 1970 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1971 self._match(TokenType.EQ) 1972 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1973 prop = self.expression(exp.DataDeletionProperty, on=on) 1974 1975 if self._match(TokenType.L_PAREN): 1976 while self._curr and not self._match(TokenType.R_PAREN): 1977 if self._match_text_seq("FILTER_COLUMN", "="): 1978 prop.set("filter_column", self._parse_column()) 1979 elif self._match_text_seq("RETENTION_PERIOD", "="): 1980 prop.set("retention_period", self._parse_retention_period()) 1981 1982 self._match(TokenType.COMMA) 1983 1984 return prop 1985 1986 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1987 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1988 prop = self._parse_system_versioning_property(with_=True) 1989 self._match_r_paren() 1990 return prop 1991 1992 if self._match(TokenType.L_PAREN, advance=False): 1993 return self._parse_wrapped_properties() 1994 1995 if self._match_text_seq("JOURNAL"): 1996 return self._parse_withjournaltable() 1997 1998 if self._match_texts(self.VIEW_ATTRIBUTES): 1999 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2000 2001 if self._match_text_seq("DATA"): 2002 return self._parse_withdata(no=False) 2003 elif self._match_text_seq("NO", "DATA"): 2004 return self._parse_withdata(no=True) 2005 2006 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2007 return self._parse_serde_properties(with_=True) 2008 2009 if not self._next: 2010 return None 2011 2012 return self._parse_withisolatedloading() 2013 2014 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2015 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2016 self._match(TokenType.EQ) 2017 2018 user = self._parse_id_var() 2019 self._match(TokenType.PARAMETER) 2020 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2021 2022 if not user or not host: 2023 return None 2024 2025 return exp.DefinerProperty(this=f"{user}@{host}") 2026 2027 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2028 self._match(TokenType.TABLE) 2029 self._match(TokenType.EQ) 2030 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2031 2032 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2033 return self.expression(exp.LogProperty, no=no) 2034 2035 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2036 return self.expression(exp.JournalProperty, **kwargs) 2037 2038 def _parse_checksum(self) -> exp.ChecksumProperty: 2039 self._match(TokenType.EQ) 2040 2041 on = None 2042 if self._match(TokenType.ON): 2043 on = True 2044 elif self._match_text_seq("OFF"): 2045 on = False 2046 2047 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2048 2049 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2050 return self.expression( 2051 exp.Cluster, 2052 expressions=( 2053 self._parse_wrapped_csv(self._parse_ordered) 2054 if wrapped 2055 else self._parse_csv(self._parse_ordered) 2056 ), 2057 ) 2058 2059 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2060 self._match_text_seq("BY") 2061 2062 self._match_l_paren() 2063 expressions = self._parse_csv(self._parse_column) 2064 self._match_r_paren() 2065 2066 if self._match_text_seq("SORTED", "BY"): 2067 self._match_l_paren() 2068 sorted_by = self._parse_csv(self._parse_ordered) 2069 self._match_r_paren() 2070 else: 2071 sorted_by = None 2072 2073 self._match(TokenType.INTO) 2074 buckets = self._parse_number() 2075 self._match_text_seq("BUCKETS") 2076 2077 return self.expression( 2078 exp.ClusteredByProperty, 2079 expressions=expressions, 2080 sorted_by=sorted_by, 2081 buckets=buckets, 2082 ) 2083 2084 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2085 if not self._match_text_seq("GRANTS"): 2086 self._retreat(self._index - 1) 2087 return None 2088 2089 return self.expression(exp.CopyGrantsProperty) 2090 2091 def _parse_freespace(self) -> exp.FreespaceProperty: 2092 self._match(TokenType.EQ) 2093 return self.expression( 2094 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2095 ) 2096 2097 def _parse_mergeblockratio( 2098 self, no: bool = False, default: bool = False 2099 ) -> exp.MergeBlockRatioProperty: 2100 if self._match(TokenType.EQ): 2101 return self.expression( 2102 exp.MergeBlockRatioProperty, 2103 this=self._parse_number(), 2104 percent=self._match(TokenType.PERCENT), 2105 ) 2106 2107 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2108 2109 def _parse_datablocksize( 2110 self, 2111 default: t.Optional[bool] = None, 2112 minimum: t.Optional[bool] = None, 2113 maximum: t.Optional[bool] = None, 2114 ) -> exp.DataBlocksizeProperty: 2115 self._match(TokenType.EQ) 2116 size = self._parse_number() 2117 2118 units = None 2119 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2120 units = self._prev.text 2121 2122 return self.expression( 2123 exp.DataBlocksizeProperty, 2124 size=size, 2125 units=units, 2126 default=default, 2127 minimum=minimum, 2128 maximum=maximum, 2129 ) 2130 2131 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2132 self._match(TokenType.EQ) 2133 always = self._match_text_seq("ALWAYS") 2134 manual = self._match_text_seq("MANUAL") 2135 never = self._match_text_seq("NEVER") 2136 default = self._match_text_seq("DEFAULT") 2137 2138 autotemp = None 2139 if self._match_text_seq("AUTOTEMP"): 2140 autotemp = self._parse_schema() 2141 2142 return self.expression( 2143 exp.BlockCompressionProperty, 2144 always=always, 2145 manual=manual, 2146 never=never, 2147 default=default, 2148 autotemp=autotemp, 2149 ) 2150 2151 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2152 index = self._index 2153 no = self._match_text_seq("NO") 2154 concurrent = self._match_text_seq("CONCURRENT") 2155 2156 if not self._match_text_seq("ISOLATED", "LOADING"): 2157 self._retreat(index) 2158 return None 2159 2160 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2161 return self.expression( 2162 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2163 ) 2164 2165 def _parse_locking(self) -> exp.LockingProperty: 2166 if self._match(TokenType.TABLE): 2167 kind = "TABLE" 2168 elif self._match(TokenType.VIEW): 2169 kind = "VIEW" 2170 elif self._match(TokenType.ROW): 2171 kind = "ROW" 2172 elif self._match_text_seq("DATABASE"): 2173 kind = "DATABASE" 2174 else: 2175 kind = None 2176 2177 if kind in ("DATABASE", "TABLE", "VIEW"): 2178 this = self._parse_table_parts() 2179 else: 2180 this = None 2181 2182 if self._match(TokenType.FOR): 2183 for_or_in = "FOR" 2184 elif self._match(TokenType.IN): 2185 for_or_in = "IN" 2186 else: 2187 for_or_in = None 2188 2189 if self._match_text_seq("ACCESS"): 2190 lock_type = "ACCESS" 2191 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2192 lock_type = "EXCLUSIVE" 2193 elif self._match_text_seq("SHARE"): 2194 lock_type = "SHARE" 2195 elif self._match_text_seq("READ"): 2196 lock_type = "READ" 2197 elif self._match_text_seq("WRITE"): 2198 lock_type = "WRITE" 2199 elif self._match_text_seq("CHECKSUM"): 2200 lock_type = "CHECKSUM" 2201 else: 2202 lock_type = None 2203 2204 override = self._match_text_seq("OVERRIDE") 2205 2206 return self.expression( 2207 exp.LockingProperty, 2208 this=this, 2209 kind=kind, 2210 for_or_in=for_or_in, 2211 lock_type=lock_type, 2212 override=override, 2213 ) 2214 2215 def _parse_partition_by(self) -> t.List[exp.Expression]: 2216 if self._match(TokenType.PARTITION_BY): 2217 return self._parse_csv(self._parse_assignment) 2218 return [] 2219 2220 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2221 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2222 if self._match_text_seq("MINVALUE"): 2223 return exp.var("MINVALUE") 2224 if self._match_text_seq("MAXVALUE"): 2225 return exp.var("MAXVALUE") 2226 return self._parse_bitwise() 2227 2228 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2229 expression = None 2230 from_expressions = None 2231 to_expressions = None 2232 2233 if self._match(TokenType.IN): 2234 this = self._parse_wrapped_csv(self._parse_bitwise) 2235 elif self._match(TokenType.FROM): 2236 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2237 self._match_text_seq("TO") 2238 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2239 elif self._match_text_seq("WITH", "(", "MODULUS"): 2240 this = self._parse_number() 2241 self._match_text_seq(",", "REMAINDER") 2242 expression = self._parse_number() 2243 self._match_r_paren() 2244 else: 2245 self.raise_error("Failed to parse partition bound spec.") 2246 2247 return self.expression( 2248 exp.PartitionBoundSpec, 2249 this=this, 2250 expression=expression, 2251 from_expressions=from_expressions, 2252 to_expressions=to_expressions, 2253 ) 2254 2255 # https://www.postgresql.org/docs/current/sql-createtable.html 2256 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2257 if not self._match_text_seq("OF"): 2258 self._retreat(self._index - 1) 2259 return None 2260 2261 this = self._parse_table(schema=True) 2262 2263 if self._match(TokenType.DEFAULT): 2264 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2265 elif self._match_text_seq("FOR", "VALUES"): 2266 expression = self._parse_partition_bound_spec() 2267 else: 2268 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2269 2270 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2271 2272 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2273 self._match(TokenType.EQ) 2274 return self.expression( 2275 exp.PartitionedByProperty, 2276 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2277 ) 2278 2279 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2280 if self._match_text_seq("AND", "STATISTICS"): 2281 statistics = True 2282 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2283 statistics = False 2284 else: 2285 statistics = None 2286 2287 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2288 2289 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2290 if self._match_text_seq("SQL"): 2291 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2292 return None 2293 2294 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2295 if self._match_text_seq("SQL", "DATA"): 2296 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2297 return None 2298 2299 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2300 if self._match_text_seq("PRIMARY", "INDEX"): 2301 return exp.NoPrimaryIndexProperty() 2302 if self._match_text_seq("SQL"): 2303 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2304 return None 2305 2306 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2307 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2308 return exp.OnCommitProperty() 2309 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2310 return exp.OnCommitProperty(delete=True) 2311 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2312 2313 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2314 if self._match_text_seq("SQL", "DATA"): 2315 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2316 return None 2317 2318 def _parse_distkey(self) -> exp.DistKeyProperty: 2319 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2320 2321 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2322 table = self._parse_table(schema=True) 2323 2324 options = [] 2325 while self._match_texts(("INCLUDING", "EXCLUDING")): 2326 this = self._prev.text.upper() 2327 2328 id_var = self._parse_id_var() 2329 if not id_var: 2330 return None 2331 2332 options.append( 2333 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2334 ) 2335 2336 return self.expression(exp.LikeProperty, this=table, expressions=options) 2337 2338 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2339 return self.expression( 2340 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2341 ) 2342 2343 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2344 self._match(TokenType.EQ) 2345 return self.expression( 2346 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2347 ) 2348 2349 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2350 self._match_text_seq("WITH", "CONNECTION") 2351 return self.expression( 2352 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2353 ) 2354 2355 def _parse_returns(self) -> exp.ReturnsProperty: 2356 value: t.Optional[exp.Expression] 2357 null = None 2358 is_table = self._match(TokenType.TABLE) 2359 2360 if is_table: 2361 if self._match(TokenType.LT): 2362 value = self.expression( 2363 exp.Schema, 2364 this="TABLE", 2365 expressions=self._parse_csv(self._parse_struct_types), 2366 ) 2367 if not self._match(TokenType.GT): 2368 self.raise_error("Expecting >") 2369 else: 2370 value = self._parse_schema(exp.var("TABLE")) 2371 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2372 null = True 2373 value = None 2374 else: 2375 value = self._parse_types() 2376 2377 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2378 2379 def _parse_describe(self) -> exp.Describe: 2380 kind = self._match_set(self.CREATABLES) and self._prev.text 2381 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2382 if self._match(TokenType.DOT): 2383 style = None 2384 self._retreat(self._index - 2) 2385 this = self._parse_table(schema=True) 2386 properties = self._parse_properties() 2387 expressions = properties.expressions if properties else None 2388 return self.expression( 2389 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2390 ) 2391 2392 def _parse_insert(self) -> exp.Insert: 2393 comments = ensure_list(self._prev_comments) 2394 hint = self._parse_hint() 2395 overwrite = self._match(TokenType.OVERWRITE) 2396 ignore = self._match(TokenType.IGNORE) 2397 local = self._match_text_seq("LOCAL") 2398 alternative = None 2399 is_function = None 2400 2401 if self._match_text_seq("DIRECTORY"): 2402 this: t.Optional[exp.Expression] = self.expression( 2403 exp.Directory, 2404 this=self._parse_var_or_string(), 2405 local=local, 2406 row_format=self._parse_row_format(match_row=True), 2407 ) 2408 else: 2409 if self._match(TokenType.OR): 2410 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2411 2412 self._match(TokenType.INTO) 2413 comments += ensure_list(self._prev_comments) 2414 self._match(TokenType.TABLE) 2415 is_function = self._match(TokenType.FUNCTION) 2416 2417 this = ( 2418 self._parse_table(schema=True, parse_partition=True) 2419 if not is_function 2420 else self._parse_function() 2421 ) 2422 2423 returning = self._parse_returning() 2424 2425 return self.expression( 2426 exp.Insert, 2427 comments=comments, 2428 hint=hint, 2429 is_function=is_function, 2430 this=this, 2431 stored=self._match_text_seq("STORED") and self._parse_stored(), 2432 by_name=self._match_text_seq("BY", "NAME"), 2433 exists=self._parse_exists(), 2434 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2435 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2436 conflict=self._parse_on_conflict(), 2437 returning=returning or self._parse_returning(), 2438 overwrite=overwrite, 2439 alternative=alternative, 2440 ignore=ignore, 2441 ) 2442 2443 def _parse_kill(self) -> exp.Kill: 2444 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2445 2446 return self.expression( 2447 exp.Kill, 2448 this=self._parse_primary(), 2449 kind=kind, 2450 ) 2451 2452 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2453 conflict = self._match_text_seq("ON", "CONFLICT") 2454 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2455 2456 if not conflict and not duplicate: 2457 return None 2458 2459 conflict_keys = None 2460 constraint = None 2461 2462 if conflict: 2463 if self._match_text_seq("ON", "CONSTRAINT"): 2464 constraint = self._parse_id_var() 2465 elif self._match(TokenType.L_PAREN): 2466 conflict_keys = self._parse_csv(self._parse_id_var) 2467 self._match_r_paren() 2468 2469 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2470 if self._prev.token_type == TokenType.UPDATE: 2471 self._match(TokenType.SET) 2472 expressions = self._parse_csv(self._parse_equality) 2473 else: 2474 expressions = None 2475 2476 return self.expression( 2477 exp.OnConflict, 2478 duplicate=duplicate, 2479 expressions=expressions, 2480 action=action, 2481 conflict_keys=conflict_keys, 2482 constraint=constraint, 2483 ) 2484 2485 def _parse_returning(self) -> t.Optional[exp.Returning]: 2486 if not self._match(TokenType.RETURNING): 2487 return None 2488 return self.expression( 2489 exp.Returning, 2490 expressions=self._parse_csv(self._parse_expression), 2491 into=self._match(TokenType.INTO) and self._parse_table_part(), 2492 ) 2493 2494 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2495 if not self._match(TokenType.FORMAT): 2496 return None 2497 return self._parse_row_format() 2498 2499 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2500 index = self._index 2501 with_ = with_ or self._match_text_seq("WITH") 2502 2503 if not self._match(TokenType.SERDE_PROPERTIES): 2504 self._retreat(index) 2505 return None 2506 return self.expression( 2507 exp.SerdeProperties, 2508 **{ # type: ignore 2509 "expressions": self._parse_wrapped_properties(), 2510 "with": with_, 2511 }, 2512 ) 2513 2514 def _parse_row_format( 2515 self, match_row: bool = False 2516 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2517 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2518 return None 2519 2520 if self._match_text_seq("SERDE"): 2521 this = self._parse_string() 2522 2523 serde_properties = self._parse_serde_properties() 2524 2525 return self.expression( 2526 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2527 ) 2528 2529 self._match_text_seq("DELIMITED") 2530 2531 kwargs = {} 2532 2533 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2534 kwargs["fields"] = self._parse_string() 2535 if self._match_text_seq("ESCAPED", "BY"): 2536 kwargs["escaped"] = self._parse_string() 2537 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2538 kwargs["collection_items"] = self._parse_string() 2539 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2540 kwargs["map_keys"] = self._parse_string() 2541 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2542 kwargs["lines"] = self._parse_string() 2543 if self._match_text_seq("NULL", "DEFINED", "AS"): 2544 kwargs["null"] = self._parse_string() 2545 2546 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2547 2548 def _parse_load(self) -> exp.LoadData | exp.Command: 2549 if self._match_text_seq("DATA"): 2550 local = self._match_text_seq("LOCAL") 2551 self._match_text_seq("INPATH") 2552 inpath = self._parse_string() 2553 overwrite = self._match(TokenType.OVERWRITE) 2554 self._match_pair(TokenType.INTO, TokenType.TABLE) 2555 2556 return self.expression( 2557 exp.LoadData, 2558 this=self._parse_table(schema=True), 2559 local=local, 2560 overwrite=overwrite, 2561 inpath=inpath, 2562 partition=self._parse_partition(), 2563 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2564 serde=self._match_text_seq("SERDE") and self._parse_string(), 2565 ) 2566 return self._parse_as_command(self._prev) 2567 2568 def _parse_delete(self) -> exp.Delete: 2569 # This handles MySQL's "Multiple-Table Syntax" 2570 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2571 tables = None 2572 comments = self._prev_comments 2573 if not self._match(TokenType.FROM, advance=False): 2574 tables = self._parse_csv(self._parse_table) or None 2575 2576 returning = self._parse_returning() 2577 2578 return self.expression( 2579 exp.Delete, 2580 comments=comments, 2581 tables=tables, 2582 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2583 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2584 where=self._parse_where(), 2585 returning=returning or self._parse_returning(), 2586 limit=self._parse_limit(), 2587 ) 2588 2589 def _parse_update(self) -> exp.Update: 2590 comments = self._prev_comments 2591 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2592 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2593 returning = self._parse_returning() 2594 return self.expression( 2595 exp.Update, 2596 comments=comments, 2597 **{ # type: ignore 2598 "this": this, 2599 "expressions": expressions, 2600 "from": self._parse_from(joins=True), 2601 "where": self._parse_where(), 2602 "returning": returning or self._parse_returning(), 2603 "order": self._parse_order(), 2604 "limit": self._parse_limit(), 2605 }, 2606 ) 2607 2608 def _parse_uncache(self) -> exp.Uncache: 2609 if not self._match(TokenType.TABLE): 2610 self.raise_error("Expecting TABLE after UNCACHE") 2611 2612 return self.expression( 2613 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2614 ) 2615 2616 def _parse_cache(self) -> exp.Cache: 2617 lazy = self._match_text_seq("LAZY") 2618 self._match(TokenType.TABLE) 2619 table = self._parse_table(schema=True) 2620 2621 options = [] 2622 if self._match_text_seq("OPTIONS"): 2623 self._match_l_paren() 2624 k = self._parse_string() 2625 self._match(TokenType.EQ) 2626 v = self._parse_string() 2627 options = [k, v] 2628 self._match_r_paren() 2629 2630 self._match(TokenType.ALIAS) 2631 return self.expression( 2632 exp.Cache, 2633 this=table, 2634 lazy=lazy, 2635 options=options, 2636 expression=self._parse_select(nested=True), 2637 ) 2638 2639 def _parse_partition(self) -> t.Optional[exp.Partition]: 2640 if not self._match(TokenType.PARTITION): 2641 return None 2642 2643 return self.expression( 2644 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2645 ) 2646 2647 def _parse_value(self) -> t.Optional[exp.Tuple]: 2648 if self._match(TokenType.L_PAREN): 2649 expressions = self._parse_csv(self._parse_expression) 2650 self._match_r_paren() 2651 return self.expression(exp.Tuple, expressions=expressions) 2652 2653 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2654 expression = self._parse_expression() 2655 if expression: 2656 return self.expression(exp.Tuple, expressions=[expression]) 2657 return None 2658 2659 def _parse_projections(self) -> t.List[exp.Expression]: 2660 return self._parse_expressions() 2661 2662 def _parse_select( 2663 self, 2664 nested: bool = False, 2665 table: bool = False, 2666 parse_subquery_alias: bool = True, 2667 parse_set_operation: bool = True, 2668 ) -> t.Optional[exp.Expression]: 2669 cte = self._parse_with() 2670 2671 if cte: 2672 this = self._parse_statement() 2673 2674 if not this: 2675 self.raise_error("Failed to parse any statement following CTE") 2676 return cte 2677 2678 if "with" in this.arg_types: 2679 this.set("with", cte) 2680 else: 2681 self.raise_error(f"{this.key} does not support CTE") 2682 this = cte 2683 2684 return this 2685 2686 # duckdb supports leading with FROM x 2687 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2688 2689 if self._match(TokenType.SELECT): 2690 comments = self._prev_comments 2691 2692 hint = self._parse_hint() 2693 all_ = self._match(TokenType.ALL) 2694 distinct = self._match_set(self.DISTINCT_TOKENS) 2695 2696 kind = ( 2697 self._match(TokenType.ALIAS) 2698 and self._match_texts(("STRUCT", "VALUE")) 2699 and self._prev.text.upper() 2700 ) 2701 2702 if distinct: 2703 distinct = self.expression( 2704 exp.Distinct, 2705 on=self._parse_value() if self._match(TokenType.ON) else None, 2706 ) 2707 2708 if all_ and distinct: 2709 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2710 2711 limit = self._parse_limit(top=True) 2712 projections = self._parse_projections() 2713 2714 this = self.expression( 2715 exp.Select, 2716 kind=kind, 2717 hint=hint, 2718 distinct=distinct, 2719 expressions=projections, 2720 limit=limit, 2721 ) 2722 this.comments = comments 2723 2724 into = self._parse_into() 2725 if into: 2726 this.set("into", into) 2727 2728 if not from_: 2729 from_ = self._parse_from() 2730 2731 if from_: 2732 this.set("from", from_) 2733 2734 this = self._parse_query_modifiers(this) 2735 elif (table or nested) and self._match(TokenType.L_PAREN): 2736 if self._match(TokenType.PIVOT): 2737 this = self._parse_simplified_pivot() 2738 elif self._match(TokenType.FROM): 2739 this = exp.select("*").from_( 2740 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2741 ) 2742 else: 2743 this = ( 2744 self._parse_table() 2745 if table 2746 else self._parse_select(nested=True, parse_set_operation=False) 2747 ) 2748 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2749 2750 self._match_r_paren() 2751 2752 # We return early here so that the UNION isn't attached to the subquery by the 2753 # following call to _parse_set_operations, but instead becomes the parent node 2754 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2755 elif self._match(TokenType.VALUES, advance=False): 2756 this = self._parse_derived_table_values() 2757 elif from_: 2758 this = exp.select("*").from_(from_.this, copy=False) 2759 else: 2760 this = None 2761 2762 if parse_set_operation: 2763 return self._parse_set_operations(this) 2764 return this 2765 2766 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2767 if not skip_with_token and not self._match(TokenType.WITH): 2768 return None 2769 2770 comments = self._prev_comments 2771 recursive = self._match(TokenType.RECURSIVE) 2772 2773 expressions = [] 2774 while True: 2775 expressions.append(self._parse_cte()) 2776 2777 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2778 break 2779 else: 2780 self._match(TokenType.WITH) 2781 2782 return self.expression( 2783 exp.With, comments=comments, expressions=expressions, recursive=recursive 2784 ) 2785 2786 def _parse_cte(self) -> exp.CTE: 2787 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2788 if not alias or not alias.this: 2789 self.raise_error("Expected CTE to have alias") 2790 2791 self._match(TokenType.ALIAS) 2792 2793 if self._match_text_seq("NOT", "MATERIALIZED"): 2794 materialized = False 2795 elif self._match_text_seq("MATERIALIZED"): 2796 materialized = True 2797 else: 2798 materialized = None 2799 2800 return self.expression( 2801 exp.CTE, 2802 this=self._parse_wrapped(self._parse_statement), 2803 alias=alias, 2804 materialized=materialized, 2805 ) 2806 2807 def _parse_table_alias( 2808 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2809 ) -> t.Optional[exp.TableAlias]: 2810 any_token = self._match(TokenType.ALIAS) 2811 alias = ( 2812 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2813 or self._parse_string_as_identifier() 2814 ) 2815 2816 index = self._index 2817 if self._match(TokenType.L_PAREN): 2818 columns = self._parse_csv(self._parse_function_parameter) 2819 self._match_r_paren() if columns else self._retreat(index) 2820 else: 2821 columns = None 2822 2823 if not alias and not columns: 2824 return None 2825 2826 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2827 2828 # We bubble up comments from the Identifier to the TableAlias 2829 if isinstance(alias, exp.Identifier): 2830 table_alias.add_comments(alias.pop_comments()) 2831 2832 return table_alias 2833 2834 def _parse_subquery( 2835 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2836 ) -> t.Optional[exp.Subquery]: 2837 if not this: 2838 return None 2839 2840 return self.expression( 2841 exp.Subquery, 2842 this=this, 2843 pivots=self._parse_pivots(), 2844 alias=self._parse_table_alias() if parse_alias else None, 2845 ) 2846 2847 def _implicit_unnests_to_explicit(self, this: E) -> E: 2848 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2849 2850 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2851 for i, join in enumerate(this.args.get("joins") or []): 2852 table = join.this 2853 normalized_table = table.copy() 2854 normalized_table.meta["maybe_column"] = True 2855 normalized_table = _norm(normalized_table, dialect=self.dialect) 2856 2857 if isinstance(table, exp.Table) and not join.args.get("on"): 2858 if normalized_table.parts[0].name in refs: 2859 table_as_column = table.to_column() 2860 unnest = exp.Unnest(expressions=[table_as_column]) 2861 2862 # Table.to_column creates a parent Alias node that we want to convert to 2863 # a TableAlias and attach to the Unnest, so it matches the parser's output 2864 if isinstance(table.args.get("alias"), exp.TableAlias): 2865 table_as_column.replace(table_as_column.this) 2866 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2867 2868 table.replace(unnest) 2869 2870 refs.add(normalized_table.alias_or_name) 2871 2872 return this 2873 2874 def _parse_query_modifiers( 2875 self, this: t.Optional[exp.Expression] 2876 ) -> t.Optional[exp.Expression]: 2877 if isinstance(this, (exp.Query, exp.Table)): 2878 for join in self._parse_joins(): 2879 this.append("joins", join) 2880 for lateral in iter(self._parse_lateral, None): 2881 this.append("laterals", lateral) 2882 2883 while True: 2884 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2885 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2886 key, expression = parser(self) 2887 2888 if expression: 2889 this.set(key, expression) 2890 if key == "limit": 2891 offset = expression.args.pop("offset", None) 2892 2893 if offset: 2894 offset = exp.Offset(expression=offset) 2895 this.set("offset", offset) 2896 2897 limit_by_expressions = expression.expressions 2898 expression.set("expressions", None) 2899 offset.set("expressions", limit_by_expressions) 2900 continue 2901 break 2902 2903 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2904 this = self._implicit_unnests_to_explicit(this) 2905 2906 return this 2907 2908 def _parse_hint(self) -> t.Optional[exp.Hint]: 2909 if self._match(TokenType.HINT): 2910 hints = [] 2911 for hint in iter( 2912 lambda: self._parse_csv( 2913 lambda: self._parse_function() or self._parse_var(upper=True) 2914 ), 2915 [], 2916 ): 2917 hints.extend(hint) 2918 2919 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2920 self.raise_error("Expected */ after HINT") 2921 2922 return self.expression(exp.Hint, expressions=hints) 2923 2924 return None 2925 2926 def _parse_into(self) -> t.Optional[exp.Into]: 2927 if not self._match(TokenType.INTO): 2928 return None 2929 2930 temp = self._match(TokenType.TEMPORARY) 2931 unlogged = self._match_text_seq("UNLOGGED") 2932 self._match(TokenType.TABLE) 2933 2934 return self.expression( 2935 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2936 ) 2937 2938 def _parse_from( 2939 self, joins: bool = False, skip_from_token: bool = False 2940 ) -> t.Optional[exp.From]: 2941 if not skip_from_token and not self._match(TokenType.FROM): 2942 return None 2943 2944 return self.expression( 2945 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2946 ) 2947 2948 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2949 return self.expression( 2950 exp.MatchRecognizeMeasure, 2951 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2952 this=self._parse_expression(), 2953 ) 2954 2955 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2956 if not self._match(TokenType.MATCH_RECOGNIZE): 2957 return None 2958 2959 self._match_l_paren() 2960 2961 partition = self._parse_partition_by() 2962 order = self._parse_order() 2963 2964 measures = ( 2965 self._parse_csv(self._parse_match_recognize_measure) 2966 if self._match_text_seq("MEASURES") 2967 else None 2968 ) 2969 2970 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2971 rows = exp.var("ONE ROW PER MATCH") 2972 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2973 text = "ALL ROWS PER MATCH" 2974 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2975 text += " SHOW EMPTY MATCHES" 2976 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2977 text += " OMIT EMPTY MATCHES" 2978 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2979 text += " WITH UNMATCHED ROWS" 2980 rows = exp.var(text) 2981 else: 2982 rows = None 2983 2984 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2985 text = "AFTER MATCH SKIP" 2986 if self._match_text_seq("PAST", "LAST", "ROW"): 2987 text += " PAST LAST ROW" 2988 elif self._match_text_seq("TO", "NEXT", "ROW"): 2989 text += " TO NEXT ROW" 2990 elif self._match_text_seq("TO", "FIRST"): 2991 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2992 elif self._match_text_seq("TO", "LAST"): 2993 text += f" TO LAST {self._advance_any().text}" # type: ignore 2994 after = exp.var(text) 2995 else: 2996 after = None 2997 2998 if self._match_text_seq("PATTERN"): 2999 self._match_l_paren() 3000 3001 if not self._curr: 3002 self.raise_error("Expecting )", self._curr) 3003 3004 paren = 1 3005 start = self._curr 3006 3007 while self._curr and paren > 0: 3008 if self._curr.token_type == TokenType.L_PAREN: 3009 paren += 1 3010 if self._curr.token_type == TokenType.R_PAREN: 3011 paren -= 1 3012 3013 end = self._prev 3014 self._advance() 3015 3016 if paren > 0: 3017 self.raise_error("Expecting )", self._curr) 3018 3019 pattern = exp.var(self._find_sql(start, end)) 3020 else: 3021 pattern = None 3022 3023 define = ( 3024 self._parse_csv(self._parse_name_as_expression) 3025 if self._match_text_seq("DEFINE") 3026 else None 3027 ) 3028 3029 self._match_r_paren() 3030 3031 return self.expression( 3032 exp.MatchRecognize, 3033 partition_by=partition, 3034 order=order, 3035 measures=measures, 3036 rows=rows, 3037 after=after, 3038 pattern=pattern, 3039 define=define, 3040 alias=self._parse_table_alias(), 3041 ) 3042 3043 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3044 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3045 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3046 cross_apply = False 3047 3048 if cross_apply is not None: 3049 this = self._parse_select(table=True) 3050 view = None 3051 outer = None 3052 elif self._match(TokenType.LATERAL): 3053 this = self._parse_select(table=True) 3054 view = self._match(TokenType.VIEW) 3055 outer = self._match(TokenType.OUTER) 3056 else: 3057 return None 3058 3059 if not this: 3060 this = ( 3061 self._parse_unnest() 3062 or self._parse_function() 3063 or self._parse_id_var(any_token=False) 3064 ) 3065 3066 while self._match(TokenType.DOT): 3067 this = exp.Dot( 3068 this=this, 3069 expression=self._parse_function() or self._parse_id_var(any_token=False), 3070 ) 3071 3072 if view: 3073 table = self._parse_id_var(any_token=False) 3074 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3075 table_alias: t.Optional[exp.TableAlias] = self.expression( 3076 exp.TableAlias, this=table, columns=columns 3077 ) 3078 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3079 # We move the alias from the lateral's child node to the lateral itself 3080 table_alias = this.args["alias"].pop() 3081 else: 3082 table_alias = self._parse_table_alias() 3083 3084 return self.expression( 3085 exp.Lateral, 3086 this=this, 3087 view=view, 3088 outer=outer, 3089 alias=table_alias, 3090 cross_apply=cross_apply, 3091 ) 3092 3093 def _parse_join_parts( 3094 self, 3095 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3096 return ( 3097 self._match_set(self.JOIN_METHODS) and self._prev, 3098 self._match_set(self.JOIN_SIDES) and self._prev, 3099 self._match_set(self.JOIN_KINDS) and self._prev, 3100 ) 3101 3102 def _parse_join( 3103 self, skip_join_token: bool = False, parse_bracket: bool = False 3104 ) -> t.Optional[exp.Join]: 3105 if self._match(TokenType.COMMA): 3106 return self.expression(exp.Join, this=self._parse_table()) 3107 3108 index = self._index 3109 method, side, kind = self._parse_join_parts() 3110 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3111 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3112 3113 if not skip_join_token and not join: 3114 self._retreat(index) 3115 kind = None 3116 method = None 3117 side = None 3118 3119 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3120 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3121 3122 if not skip_join_token and not join and not outer_apply and not cross_apply: 3123 return None 3124 3125 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3126 3127 if method: 3128 kwargs["method"] = method.text 3129 if side: 3130 kwargs["side"] = side.text 3131 if kind: 3132 kwargs["kind"] = kind.text 3133 if hint: 3134 kwargs["hint"] = hint 3135 3136 if self._match(TokenType.MATCH_CONDITION): 3137 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3138 3139 if self._match(TokenType.ON): 3140 kwargs["on"] = self._parse_assignment() 3141 elif self._match(TokenType.USING): 3142 kwargs["using"] = self._parse_wrapped_id_vars() 3143 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3144 kind and kind.token_type == TokenType.CROSS 3145 ): 3146 index = self._index 3147 joins: t.Optional[list] = list(self._parse_joins()) 3148 3149 if joins and self._match(TokenType.ON): 3150 kwargs["on"] = self._parse_assignment() 3151 elif joins and self._match(TokenType.USING): 3152 kwargs["using"] = self._parse_wrapped_id_vars() 3153 else: 3154 joins = None 3155 self._retreat(index) 3156 3157 kwargs["this"].set("joins", joins if joins else None) 3158 3159 comments = [c for token in (method, side, kind) if token for c in token.comments] 3160 return self.expression(exp.Join, comments=comments, **kwargs) 3161 3162 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3163 this = self._parse_assignment() 3164 3165 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3166 return this 3167 3168 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3169 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3170 3171 return this 3172 3173 def _parse_index_params(self) -> exp.IndexParameters: 3174 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3175 3176 if self._match(TokenType.L_PAREN, advance=False): 3177 columns = self._parse_wrapped_csv(self._parse_with_operator) 3178 else: 3179 columns = None 3180 3181 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3182 partition_by = self._parse_partition_by() 3183 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3184 tablespace = ( 3185 self._parse_var(any_token=True) 3186 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3187 else None 3188 ) 3189 where = self._parse_where() 3190 3191 return self.expression( 3192 exp.IndexParameters, 3193 using=using, 3194 columns=columns, 3195 include=include, 3196 partition_by=partition_by, 3197 where=where, 3198 with_storage=with_storage, 3199 tablespace=tablespace, 3200 ) 3201 3202 def _parse_index( 3203 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3204 ) -> t.Optional[exp.Index]: 3205 if index or anonymous: 3206 unique = None 3207 primary = None 3208 amp = None 3209 3210 self._match(TokenType.ON) 3211 self._match(TokenType.TABLE) # hive 3212 table = self._parse_table_parts(schema=True) 3213 else: 3214 unique = self._match(TokenType.UNIQUE) 3215 primary = self._match_text_seq("PRIMARY") 3216 amp = self._match_text_seq("AMP") 3217 3218 if not self._match(TokenType.INDEX): 3219 return None 3220 3221 index = self._parse_id_var() 3222 table = None 3223 3224 params = self._parse_index_params() 3225 3226 return self.expression( 3227 exp.Index, 3228 this=index, 3229 table=table, 3230 unique=unique, 3231 primary=primary, 3232 amp=amp, 3233 params=params, 3234 ) 3235 3236 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3237 hints: t.List[exp.Expression] = [] 3238 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3239 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3240 hints.append( 3241 self.expression( 3242 exp.WithTableHint, 3243 expressions=self._parse_csv( 3244 lambda: self._parse_function() or self._parse_var(any_token=True) 3245 ), 3246 ) 3247 ) 3248 self._match_r_paren() 3249 else: 3250 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3251 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3252 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3253 3254 self._match_set((TokenType.INDEX, TokenType.KEY)) 3255 if self._match(TokenType.FOR): 3256 hint.set("target", self._advance_any() and self._prev.text.upper()) 3257 3258 hint.set("expressions", self._parse_wrapped_id_vars()) 3259 hints.append(hint) 3260 3261 return hints or None 3262 3263 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3264 return ( 3265 (not schema and self._parse_function(optional_parens=False)) 3266 or self._parse_id_var(any_token=False) 3267 or self._parse_string_as_identifier() 3268 or self._parse_placeholder() 3269 ) 3270 3271 def _parse_table_parts( 3272 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3273 ) -> exp.Table: 3274 catalog = None 3275 db = None 3276 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3277 3278 while self._match(TokenType.DOT): 3279 if catalog: 3280 # This allows nesting the table in arbitrarily many dot expressions if needed 3281 table = self.expression( 3282 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3283 ) 3284 else: 3285 catalog = db 3286 db = table 3287 # "" used for tsql FROM a..b case 3288 table = self._parse_table_part(schema=schema) or "" 3289 3290 if ( 3291 wildcard 3292 and self._is_connected() 3293 and (isinstance(table, exp.Identifier) or not table) 3294 and self._match(TokenType.STAR) 3295 ): 3296 if isinstance(table, exp.Identifier): 3297 table.args["this"] += "*" 3298 else: 3299 table = exp.Identifier(this="*") 3300 3301 # We bubble up comments from the Identifier to the Table 3302 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3303 3304 if is_db_reference: 3305 catalog = db 3306 db = table 3307 table = None 3308 3309 if not table and not is_db_reference: 3310 self.raise_error(f"Expected table name but got {self._curr}") 3311 if not db and is_db_reference: 3312 self.raise_error(f"Expected database name but got {self._curr}") 3313 3314 return self.expression( 3315 exp.Table, 3316 comments=comments, 3317 this=table, 3318 db=db, 3319 catalog=catalog, 3320 pivots=self._parse_pivots(), 3321 ) 3322 3323 def _parse_table( 3324 self, 3325 schema: bool = False, 3326 joins: bool = False, 3327 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3328 parse_bracket: bool = False, 3329 is_db_reference: bool = False, 3330 parse_partition: bool = False, 3331 ) -> t.Optional[exp.Expression]: 3332 lateral = self._parse_lateral() 3333 if lateral: 3334 return lateral 3335 3336 unnest = self._parse_unnest() 3337 if unnest: 3338 return unnest 3339 3340 values = self._parse_derived_table_values() 3341 if values: 3342 return values 3343 3344 subquery = self._parse_select(table=True) 3345 if subquery: 3346 if not subquery.args.get("pivots"): 3347 subquery.set("pivots", self._parse_pivots()) 3348 return subquery 3349 3350 bracket = parse_bracket and self._parse_bracket(None) 3351 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3352 3353 only = self._match(TokenType.ONLY) 3354 3355 this = t.cast( 3356 exp.Expression, 3357 bracket 3358 or self._parse_bracket( 3359 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3360 ), 3361 ) 3362 3363 if only: 3364 this.set("only", only) 3365 3366 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3367 self._match_text_seq("*") 3368 3369 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3370 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3371 this.set("partition", self._parse_partition()) 3372 3373 if schema: 3374 return self._parse_schema(this=this) 3375 3376 version = self._parse_version() 3377 3378 if version: 3379 this.set("version", version) 3380 3381 if self.dialect.ALIAS_POST_TABLESAMPLE: 3382 table_sample = self._parse_table_sample() 3383 3384 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3385 if alias: 3386 this.set("alias", alias) 3387 3388 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3389 return self.expression( 3390 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3391 ) 3392 3393 this.set("hints", self._parse_table_hints()) 3394 3395 if not this.args.get("pivots"): 3396 this.set("pivots", self._parse_pivots()) 3397 3398 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3399 table_sample = self._parse_table_sample() 3400 3401 if table_sample: 3402 table_sample.set("this", this) 3403 this = table_sample 3404 3405 if joins: 3406 for join in self._parse_joins(): 3407 this.append("joins", join) 3408 3409 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3410 this.set("ordinality", True) 3411 this.set("alias", self._parse_table_alias()) 3412 3413 return this 3414 3415 def _parse_version(self) -> t.Optional[exp.Version]: 3416 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3417 this = "TIMESTAMP" 3418 elif self._match(TokenType.VERSION_SNAPSHOT): 3419 this = "VERSION" 3420 else: 3421 return None 3422 3423 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3424 kind = self._prev.text.upper() 3425 start = self._parse_bitwise() 3426 self._match_texts(("TO", "AND")) 3427 end = self._parse_bitwise() 3428 expression: t.Optional[exp.Expression] = self.expression( 3429 exp.Tuple, expressions=[start, end] 3430 ) 3431 elif self._match_text_seq("CONTAINED", "IN"): 3432 kind = "CONTAINED IN" 3433 expression = self.expression( 3434 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3435 ) 3436 elif self._match(TokenType.ALL): 3437 kind = "ALL" 3438 expression = None 3439 else: 3440 self._match_text_seq("AS", "OF") 3441 kind = "AS OF" 3442 expression = self._parse_type() 3443 3444 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3445 3446 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3447 if not self._match(TokenType.UNNEST): 3448 return None 3449 3450 expressions = self._parse_wrapped_csv(self._parse_equality) 3451 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3452 3453 alias = self._parse_table_alias() if with_alias else None 3454 3455 if alias: 3456 if self.dialect.UNNEST_COLUMN_ONLY: 3457 if alias.args.get("columns"): 3458 self.raise_error("Unexpected extra column alias in unnest.") 3459 3460 alias.set("columns", [alias.this]) 3461 alias.set("this", None) 3462 3463 columns = alias.args.get("columns") or [] 3464 if offset and len(expressions) < len(columns): 3465 offset = columns.pop() 3466 3467 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3468 self._match(TokenType.ALIAS) 3469 offset = self._parse_id_var( 3470 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3471 ) or exp.to_identifier("offset") 3472 3473 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3474 3475 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3476 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3477 if not is_derived and not self._match_text_seq("VALUES"): 3478 return None 3479 3480 expressions = self._parse_csv(self._parse_value) 3481 alias = self._parse_table_alias() 3482 3483 if is_derived: 3484 self._match_r_paren() 3485 3486 return self.expression( 3487 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3488 ) 3489 3490 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3491 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3492 as_modifier and self._match_text_seq("USING", "SAMPLE") 3493 ): 3494 return None 3495 3496 bucket_numerator = None 3497 bucket_denominator = None 3498 bucket_field = None 3499 percent = None 3500 size = None 3501 seed = None 3502 3503 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3504 matched_l_paren = self._match(TokenType.L_PAREN) 3505 3506 if self.TABLESAMPLE_CSV: 3507 num = None 3508 expressions = self._parse_csv(self._parse_primary) 3509 else: 3510 expressions = None 3511 num = ( 3512 self._parse_factor() 3513 if self._match(TokenType.NUMBER, advance=False) 3514 else self._parse_primary() or self._parse_placeholder() 3515 ) 3516 3517 if self._match_text_seq("BUCKET"): 3518 bucket_numerator = self._parse_number() 3519 self._match_text_seq("OUT", "OF") 3520 bucket_denominator = bucket_denominator = self._parse_number() 3521 self._match(TokenType.ON) 3522 bucket_field = self._parse_field() 3523 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3524 percent = num 3525 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3526 size = num 3527 else: 3528 percent = num 3529 3530 if matched_l_paren: 3531 self._match_r_paren() 3532 3533 if self._match(TokenType.L_PAREN): 3534 method = self._parse_var(upper=True) 3535 seed = self._match(TokenType.COMMA) and self._parse_number() 3536 self._match_r_paren() 3537 elif self._match_texts(("SEED", "REPEATABLE")): 3538 seed = self._parse_wrapped(self._parse_number) 3539 3540 if not method and self.DEFAULT_SAMPLING_METHOD: 3541 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3542 3543 return self.expression( 3544 exp.TableSample, 3545 expressions=expressions, 3546 method=method, 3547 bucket_numerator=bucket_numerator, 3548 bucket_denominator=bucket_denominator, 3549 bucket_field=bucket_field, 3550 percent=percent, 3551 size=size, 3552 seed=seed, 3553 ) 3554 3555 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3556 return list(iter(self._parse_pivot, None)) or None 3557 3558 def _parse_joins(self) -> t.Iterator[exp.Join]: 3559 return iter(self._parse_join, None) 3560 3561 # https://duckdb.org/docs/sql/statements/pivot 3562 def _parse_simplified_pivot(self) -> exp.Pivot: 3563 def _parse_on() -> t.Optional[exp.Expression]: 3564 this = self._parse_bitwise() 3565 return self._parse_in(this) if self._match(TokenType.IN) else this 3566 3567 this = self._parse_table() 3568 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3569 using = self._match(TokenType.USING) and self._parse_csv( 3570 lambda: self._parse_alias(self._parse_function()) 3571 ) 3572 group = self._parse_group() 3573 return self.expression( 3574 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3575 ) 3576 3577 def _parse_pivot_in(self) -> exp.In: 3578 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3579 this = self._parse_assignment() 3580 3581 self._match(TokenType.ALIAS) 3582 alias = self._parse_field() 3583 if alias: 3584 return self.expression(exp.PivotAlias, this=this, alias=alias) 3585 3586 return this 3587 3588 value = self._parse_column() 3589 3590 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3591 self.raise_error("Expecting IN (") 3592 3593 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3594 3595 self._match_r_paren() 3596 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3597 3598 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3599 index = self._index 3600 include_nulls = None 3601 3602 if self._match(TokenType.PIVOT): 3603 unpivot = False 3604 elif self._match(TokenType.UNPIVOT): 3605 unpivot = True 3606 3607 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3608 if self._match_text_seq("INCLUDE", "NULLS"): 3609 include_nulls = True 3610 elif self._match_text_seq("EXCLUDE", "NULLS"): 3611 include_nulls = False 3612 else: 3613 return None 3614 3615 expressions = [] 3616 3617 if not self._match(TokenType.L_PAREN): 3618 self._retreat(index) 3619 return None 3620 3621 if unpivot: 3622 expressions = self._parse_csv(self._parse_column) 3623 else: 3624 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3625 3626 if not expressions: 3627 self.raise_error("Failed to parse PIVOT's aggregation list") 3628 3629 if not self._match(TokenType.FOR): 3630 self.raise_error("Expecting FOR") 3631 3632 field = self._parse_pivot_in() 3633 3634 self._match_r_paren() 3635 3636 pivot = self.expression( 3637 exp.Pivot, 3638 expressions=expressions, 3639 field=field, 3640 unpivot=unpivot, 3641 include_nulls=include_nulls, 3642 ) 3643 3644 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3645 pivot.set("alias", self._parse_table_alias()) 3646 3647 if not unpivot: 3648 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3649 3650 columns: t.List[exp.Expression] = [] 3651 for fld in pivot.args["field"].expressions: 3652 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3653 for name in names: 3654 if self.PREFIXED_PIVOT_COLUMNS: 3655 name = f"{name}_{field_name}" if name else field_name 3656 else: 3657 name = f"{field_name}_{name}" if name else field_name 3658 3659 columns.append(exp.to_identifier(name)) 3660 3661 pivot.set("columns", columns) 3662 3663 return pivot 3664 3665 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3666 return [agg.alias for agg in aggregations] 3667 3668 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3669 if not skip_where_token and not self._match(TokenType.PREWHERE): 3670 return None 3671 3672 return self.expression( 3673 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3674 ) 3675 3676 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3677 if not skip_where_token and not self._match(TokenType.WHERE): 3678 return None 3679 3680 return self.expression( 3681 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3682 ) 3683 3684 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3685 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3686 return None 3687 3688 elements: t.Dict[str, t.Any] = defaultdict(list) 3689 3690 if self._match(TokenType.ALL): 3691 elements["all"] = True 3692 elif self._match(TokenType.DISTINCT): 3693 elements["all"] = False 3694 3695 while True: 3696 expressions = self._parse_csv( 3697 lambda: None 3698 if self._match(TokenType.ROLLUP, advance=False) 3699 else self._parse_assignment() 3700 ) 3701 if expressions: 3702 elements["expressions"].extend(expressions) 3703 3704 grouping_sets = self._parse_grouping_sets() 3705 if grouping_sets: 3706 elements["grouping_sets"].extend(grouping_sets) 3707 3708 rollup = None 3709 cube = None 3710 totals = None 3711 3712 index = self._index 3713 with_ = self._match(TokenType.WITH) 3714 if self._match(TokenType.ROLLUP): 3715 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3716 elements["rollup"].extend(ensure_list(rollup)) 3717 3718 if self._match(TokenType.CUBE): 3719 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3720 elements["cube"].extend(ensure_list(cube)) 3721 3722 if self._match_text_seq("TOTALS"): 3723 totals = True 3724 elements["totals"] = True # type: ignore 3725 3726 if not (grouping_sets or rollup or cube or totals): 3727 if with_: 3728 self._retreat(index) 3729 break 3730 3731 return self.expression(exp.Group, **elements) # type: ignore 3732 3733 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3734 if not self._match(TokenType.GROUPING_SETS): 3735 return None 3736 3737 return self._parse_wrapped_csv(self._parse_grouping_set) 3738 3739 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3740 if self._match(TokenType.L_PAREN): 3741 grouping_set = self._parse_csv(self._parse_column) 3742 self._match_r_paren() 3743 return self.expression(exp.Tuple, expressions=grouping_set) 3744 3745 return self._parse_column() 3746 3747 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3748 if not skip_having_token and not self._match(TokenType.HAVING): 3749 return None 3750 return self.expression(exp.Having, this=self._parse_assignment()) 3751 3752 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3753 if not self._match(TokenType.QUALIFY): 3754 return None 3755 return self.expression(exp.Qualify, this=self._parse_assignment()) 3756 3757 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3758 if skip_start_token: 3759 start = None 3760 elif self._match(TokenType.START_WITH): 3761 start = self._parse_assignment() 3762 else: 3763 return None 3764 3765 self._match(TokenType.CONNECT_BY) 3766 nocycle = self._match_text_seq("NOCYCLE") 3767 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3768 exp.Prior, this=self._parse_bitwise() 3769 ) 3770 connect = self._parse_assignment() 3771 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3772 3773 if not start and self._match(TokenType.START_WITH): 3774 start = self._parse_assignment() 3775 3776 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3777 3778 def _parse_name_as_expression(self) -> exp.Alias: 3779 return self.expression( 3780 exp.Alias, 3781 alias=self._parse_id_var(any_token=True), 3782 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3783 ) 3784 3785 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3786 if self._match_text_seq("INTERPOLATE"): 3787 return self._parse_wrapped_csv(self._parse_name_as_expression) 3788 return None 3789 3790 def _parse_order( 3791 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3792 ) -> t.Optional[exp.Expression]: 3793 siblings = None 3794 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3795 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3796 return this 3797 3798 siblings = True 3799 3800 return self.expression( 3801 exp.Order, 3802 this=this, 3803 expressions=self._parse_csv(self._parse_ordered), 3804 interpolate=self._parse_interpolate(), 3805 siblings=siblings, 3806 ) 3807 3808 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3809 if not self._match(token): 3810 return None 3811 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3812 3813 def _parse_ordered( 3814 self, parse_method: t.Optional[t.Callable] = None 3815 ) -> t.Optional[exp.Ordered]: 3816 this = parse_method() if parse_method else self._parse_assignment() 3817 if not this: 3818 return None 3819 3820 asc = self._match(TokenType.ASC) 3821 desc = self._match(TokenType.DESC) or (asc and False) 3822 3823 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3824 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3825 3826 nulls_first = is_nulls_first or False 3827 explicitly_null_ordered = is_nulls_first or is_nulls_last 3828 3829 if ( 3830 not explicitly_null_ordered 3831 and ( 3832 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3833 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3834 ) 3835 and self.dialect.NULL_ORDERING != "nulls_are_last" 3836 ): 3837 nulls_first = True 3838 3839 if self._match_text_seq("WITH", "FILL"): 3840 with_fill = self.expression( 3841 exp.WithFill, 3842 **{ # type: ignore 3843 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3844 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3845 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3846 }, 3847 ) 3848 else: 3849 with_fill = None 3850 3851 return self.expression( 3852 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3853 ) 3854 3855 def _parse_limit( 3856 self, 3857 this: t.Optional[exp.Expression] = None, 3858 top: bool = False, 3859 skip_limit_token: bool = False, 3860 ) -> t.Optional[exp.Expression]: 3861 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3862 comments = self._prev_comments 3863 if top: 3864 limit_paren = self._match(TokenType.L_PAREN) 3865 expression = self._parse_term() if limit_paren else self._parse_number() 3866 3867 if limit_paren: 3868 self._match_r_paren() 3869 else: 3870 expression = self._parse_term() 3871 3872 if self._match(TokenType.COMMA): 3873 offset = expression 3874 expression = self._parse_term() 3875 else: 3876 offset = None 3877 3878 limit_exp = self.expression( 3879 exp.Limit, 3880 this=this, 3881 expression=expression, 3882 offset=offset, 3883 comments=comments, 3884 expressions=self._parse_limit_by(), 3885 ) 3886 3887 return limit_exp 3888 3889 if self._match(TokenType.FETCH): 3890 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3891 direction = self._prev.text.upper() if direction else "FIRST" 3892 3893 count = self._parse_field(tokens=self.FETCH_TOKENS) 3894 percent = self._match(TokenType.PERCENT) 3895 3896 self._match_set((TokenType.ROW, TokenType.ROWS)) 3897 3898 only = self._match_text_seq("ONLY") 3899 with_ties = self._match_text_seq("WITH", "TIES") 3900 3901 if only and with_ties: 3902 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3903 3904 return self.expression( 3905 exp.Fetch, 3906 direction=direction, 3907 count=count, 3908 percent=percent, 3909 with_ties=with_ties, 3910 ) 3911 3912 return this 3913 3914 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3915 if not self._match(TokenType.OFFSET): 3916 return this 3917 3918 count = self._parse_term() 3919 self._match_set((TokenType.ROW, TokenType.ROWS)) 3920 3921 return self.expression( 3922 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3923 ) 3924 3925 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3926 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3927 3928 def _parse_locks(self) -> t.List[exp.Lock]: 3929 locks = [] 3930 while True: 3931 if self._match_text_seq("FOR", "UPDATE"): 3932 update = True 3933 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3934 "LOCK", "IN", "SHARE", "MODE" 3935 ): 3936 update = False 3937 else: 3938 break 3939 3940 expressions = None 3941 if self._match_text_seq("OF"): 3942 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3943 3944 wait: t.Optional[bool | exp.Expression] = None 3945 if self._match_text_seq("NOWAIT"): 3946 wait = True 3947 elif self._match_text_seq("WAIT"): 3948 wait = self._parse_primary() 3949 elif self._match_text_seq("SKIP", "LOCKED"): 3950 wait = False 3951 3952 locks.append( 3953 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3954 ) 3955 3956 return locks 3957 3958 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3959 while this and self._match_set(self.SET_OPERATIONS): 3960 token_type = self._prev.token_type 3961 3962 if token_type == TokenType.UNION: 3963 operation = exp.Union 3964 elif token_type == TokenType.EXCEPT: 3965 operation = exp.Except 3966 else: 3967 operation = exp.Intersect 3968 3969 comments = self._prev.comments 3970 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3971 by_name = self._match_text_seq("BY", "NAME") 3972 expression = self._parse_select(nested=True, parse_set_operation=False) 3973 3974 this = self.expression( 3975 operation, 3976 comments=comments, 3977 this=this, 3978 distinct=distinct, 3979 by_name=by_name, 3980 expression=expression, 3981 ) 3982 3983 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3984 expression = this.expression 3985 3986 if expression: 3987 for arg in self.UNION_MODIFIERS: 3988 expr = expression.args.get(arg) 3989 if expr: 3990 this.set(arg, expr.pop()) 3991 3992 return this 3993 3994 def _parse_expression(self) -> t.Optional[exp.Expression]: 3995 return self._parse_alias(self._parse_assignment()) 3996 3997 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3998 this = self._parse_disjunction() 3999 4000 while self._match_set(self.ASSIGNMENT): 4001 this = self.expression( 4002 self.ASSIGNMENT[self._prev.token_type], 4003 this=this, 4004 comments=self._prev_comments, 4005 expression=self._parse_assignment(), 4006 ) 4007 4008 return this 4009 4010 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4011 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4012 4013 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4014 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4015 4016 def _parse_equality(self) -> t.Optional[exp.Expression]: 4017 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4018 4019 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4020 return self._parse_tokens(self._parse_range, self.COMPARISON) 4021 4022 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4023 this = this or self._parse_bitwise() 4024 negate = self._match(TokenType.NOT) 4025 4026 if self._match_set(self.RANGE_PARSERS): 4027 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4028 if not expression: 4029 return this 4030 4031 this = expression 4032 elif self._match(TokenType.ISNULL): 4033 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4034 4035 # Postgres supports ISNULL and NOTNULL for conditions. 4036 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4037 if self._match(TokenType.NOTNULL): 4038 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4039 this = self.expression(exp.Not, this=this) 4040 4041 if negate: 4042 this = self.expression(exp.Not, this=this) 4043 4044 if self._match(TokenType.IS): 4045 this = self._parse_is(this) 4046 4047 return this 4048 4049 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4050 index = self._index - 1 4051 negate = self._match(TokenType.NOT) 4052 4053 if self._match_text_seq("DISTINCT", "FROM"): 4054 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4055 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4056 4057 expression = self._parse_null() or self._parse_boolean() 4058 if not expression: 4059 self._retreat(index) 4060 return None 4061 4062 this = self.expression(exp.Is, this=this, expression=expression) 4063 return self.expression(exp.Not, this=this) if negate else this 4064 4065 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4066 unnest = self._parse_unnest(with_alias=False) 4067 if unnest: 4068 this = self.expression(exp.In, this=this, unnest=unnest) 4069 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4070 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4071 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4072 4073 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4074 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4075 else: 4076 this = self.expression(exp.In, this=this, expressions=expressions) 4077 4078 if matched_l_paren: 4079 self._match_r_paren(this) 4080 elif not self._match(TokenType.R_BRACKET, expression=this): 4081 self.raise_error("Expecting ]") 4082 else: 4083 this = self.expression(exp.In, this=this, field=self._parse_field()) 4084 4085 return this 4086 4087 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4088 low = self._parse_bitwise() 4089 self._match(TokenType.AND) 4090 high = self._parse_bitwise() 4091 return self.expression(exp.Between, this=this, low=low, high=high) 4092 4093 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4094 if not self._match(TokenType.ESCAPE): 4095 return this 4096 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4097 4098 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4099 index = self._index 4100 4101 if not self._match(TokenType.INTERVAL) and match_interval: 4102 return None 4103 4104 if self._match(TokenType.STRING, advance=False): 4105 this = self._parse_primary() 4106 else: 4107 this = self._parse_term() 4108 4109 if not this or ( 4110 isinstance(this, exp.Column) 4111 and not this.table 4112 and not this.this.quoted 4113 and this.name.upper() == "IS" 4114 ): 4115 self._retreat(index) 4116 return None 4117 4118 unit = self._parse_function() or ( 4119 not self._match(TokenType.ALIAS, advance=False) 4120 and self._parse_var(any_token=True, upper=True) 4121 ) 4122 4123 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4124 # each INTERVAL expression into this canonical form so it's easy to transpile 4125 if this and this.is_number: 4126 this = exp.Literal.string(this.name) 4127 elif this and this.is_string: 4128 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4129 if len(parts) == 1: 4130 if unit: 4131 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4132 self._retreat(self._index - 1) 4133 4134 this = exp.Literal.string(parts[0][0]) 4135 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4136 4137 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4138 unit = self.expression( 4139 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4140 ) 4141 4142 interval = self.expression(exp.Interval, this=this, unit=unit) 4143 4144 index = self._index 4145 self._match(TokenType.PLUS) 4146 4147 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4148 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4149 return self.expression( 4150 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4151 ) 4152 4153 self._retreat(index) 4154 return interval 4155 4156 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4157 this = self._parse_term() 4158 4159 while True: 4160 if self._match_set(self.BITWISE): 4161 this = self.expression( 4162 self.BITWISE[self._prev.token_type], 4163 this=this, 4164 expression=self._parse_term(), 4165 ) 4166 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4167 this = self.expression( 4168 exp.DPipe, 4169 this=this, 4170 expression=self._parse_term(), 4171 safe=not self.dialect.STRICT_STRING_CONCAT, 4172 ) 4173 elif self._match(TokenType.DQMARK): 4174 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4175 elif self._match_pair(TokenType.LT, TokenType.LT): 4176 this = self.expression( 4177 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4178 ) 4179 elif self._match_pair(TokenType.GT, TokenType.GT): 4180 this = self.expression( 4181 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4182 ) 4183 else: 4184 break 4185 4186 return this 4187 4188 def _parse_term(self) -> t.Optional[exp.Expression]: 4189 return self._parse_tokens(self._parse_factor, self.TERM) 4190 4191 def _parse_factor(self) -> t.Optional[exp.Expression]: 4192 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4193 this = parse_method() 4194 4195 while self._match_set(self.FACTOR): 4196 klass = self.FACTOR[self._prev.token_type] 4197 comments = self._prev_comments 4198 expression = parse_method() 4199 4200 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4201 self._retreat(self._index - 1) 4202 return this 4203 4204 this = self.expression(klass, this=this, comments=comments, expression=expression) 4205 4206 if isinstance(this, exp.Div): 4207 this.args["typed"] = self.dialect.TYPED_DIVISION 4208 this.args["safe"] = self.dialect.SAFE_DIVISION 4209 4210 return this 4211 4212 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4213 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4214 4215 def _parse_unary(self) -> t.Optional[exp.Expression]: 4216 if self._match_set(self.UNARY_PARSERS): 4217 return self.UNARY_PARSERS[self._prev.token_type](self) 4218 return self._parse_at_time_zone(self._parse_type()) 4219 4220 def _parse_type( 4221 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4222 ) -> t.Optional[exp.Expression]: 4223 interval = parse_interval and self._parse_interval() 4224 if interval: 4225 return interval 4226 4227 index = self._index 4228 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4229 4230 if data_type: 4231 index2 = self._index 4232 this = self._parse_primary() 4233 4234 if isinstance(this, exp.Literal): 4235 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4236 if parser: 4237 return parser(self, this, data_type) 4238 4239 return self.expression(exp.Cast, this=this, to=data_type) 4240 4241 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4242 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4243 # 4244 # If the index difference here is greater than 1, that means the parser itself must have 4245 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4246 # 4247 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4248 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4249 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4250 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4251 # 4252 # In these cases, we don't really want to return the converted type, but instead retreat 4253 # and try to parse a Column or Identifier in the section below. 4254 if data_type.expressions and index2 - index > 1: 4255 self._retreat(index2) 4256 return self._parse_column_ops(data_type) 4257 4258 self._retreat(index) 4259 4260 if fallback_to_identifier: 4261 return self._parse_id_var() 4262 4263 this = self._parse_column() 4264 return this and self._parse_column_ops(this) 4265 4266 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4267 this = self._parse_type() 4268 if not this: 4269 return None 4270 4271 if isinstance(this, exp.Column) and not this.table: 4272 this = exp.var(this.name.upper()) 4273 4274 return self.expression( 4275 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4276 ) 4277 4278 def _parse_types( 4279 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4280 ) -> t.Optional[exp.Expression]: 4281 index = self._index 4282 4283 this: t.Optional[exp.Expression] = None 4284 prefix = self._match_text_seq("SYSUDTLIB", ".") 4285 4286 if not self._match_set(self.TYPE_TOKENS): 4287 identifier = allow_identifiers and self._parse_id_var( 4288 any_token=False, tokens=(TokenType.VAR,) 4289 ) 4290 if identifier: 4291 tokens = self.dialect.tokenize(identifier.name) 4292 4293 if len(tokens) != 1: 4294 self.raise_error("Unexpected identifier", self._prev) 4295 4296 if tokens[0].token_type in self.TYPE_TOKENS: 4297 self._prev = tokens[0] 4298 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4299 type_name = identifier.name 4300 4301 while self._match(TokenType.DOT): 4302 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4303 4304 this = exp.DataType.build(type_name, udt=True) 4305 else: 4306 self._retreat(self._index - 1) 4307 return None 4308 else: 4309 return None 4310 4311 type_token = self._prev.token_type 4312 4313 if type_token == TokenType.PSEUDO_TYPE: 4314 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4315 4316 if type_token == TokenType.OBJECT_IDENTIFIER: 4317 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4318 4319 # https://materialize.com/docs/sql/types/map/ 4320 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4321 key_type = self._parse_types( 4322 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4323 ) 4324 if not self._match(TokenType.FARROW): 4325 self._retreat(index) 4326 return None 4327 4328 value_type = self._parse_types( 4329 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4330 ) 4331 if not self._match(TokenType.R_BRACKET): 4332 self._retreat(index) 4333 return None 4334 4335 return exp.DataType( 4336 this=exp.DataType.Type.MAP, 4337 expressions=[key_type, value_type], 4338 nested=True, 4339 prefix=prefix, 4340 ) 4341 4342 nested = type_token in self.NESTED_TYPE_TOKENS 4343 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4344 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4345 expressions = None 4346 maybe_func = False 4347 4348 if self._match(TokenType.L_PAREN): 4349 if is_struct: 4350 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4351 elif nested: 4352 expressions = self._parse_csv( 4353 lambda: self._parse_types( 4354 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4355 ) 4356 ) 4357 elif type_token in self.ENUM_TYPE_TOKENS: 4358 expressions = self._parse_csv(self._parse_equality) 4359 elif is_aggregate: 4360 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4361 any_token=False, tokens=(TokenType.VAR,) 4362 ) 4363 if not func_or_ident or not self._match(TokenType.COMMA): 4364 return None 4365 expressions = self._parse_csv( 4366 lambda: self._parse_types( 4367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4368 ) 4369 ) 4370 expressions.insert(0, func_or_ident) 4371 else: 4372 expressions = self._parse_csv(self._parse_type_size) 4373 4374 if not expressions or not self._match(TokenType.R_PAREN): 4375 self._retreat(index) 4376 return None 4377 4378 maybe_func = True 4379 4380 values: t.Optional[t.List[exp.Expression]] = None 4381 4382 if nested and self._match(TokenType.LT): 4383 if is_struct: 4384 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4385 else: 4386 expressions = self._parse_csv( 4387 lambda: self._parse_types( 4388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4389 ) 4390 ) 4391 4392 if not self._match(TokenType.GT): 4393 self.raise_error("Expecting >") 4394 4395 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4396 values = self._parse_csv(self._parse_assignment) 4397 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4398 4399 if type_token in self.TIMESTAMPS: 4400 if self._match_text_seq("WITH", "TIME", "ZONE"): 4401 maybe_func = False 4402 tz_type = ( 4403 exp.DataType.Type.TIMETZ 4404 if type_token in self.TIMES 4405 else exp.DataType.Type.TIMESTAMPTZ 4406 ) 4407 this = exp.DataType(this=tz_type, expressions=expressions) 4408 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4409 maybe_func = False 4410 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4411 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4412 maybe_func = False 4413 elif type_token == TokenType.INTERVAL: 4414 unit = self._parse_var(upper=True) 4415 if unit: 4416 if self._match_text_seq("TO"): 4417 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4418 4419 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4420 else: 4421 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4422 4423 if maybe_func and check_func: 4424 index2 = self._index 4425 peek = self._parse_string() 4426 4427 if not peek: 4428 self._retreat(index) 4429 return None 4430 4431 self._retreat(index2) 4432 4433 if not this: 4434 if self._match_text_seq("UNSIGNED"): 4435 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4436 if not unsigned_type_token: 4437 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4438 4439 type_token = unsigned_type_token or type_token 4440 4441 this = exp.DataType( 4442 this=exp.DataType.Type[type_token.value], 4443 expressions=expressions, 4444 nested=nested, 4445 values=values, 4446 prefix=prefix, 4447 ) 4448 elif expressions: 4449 this.set("expressions", expressions) 4450 4451 # https://materialize.com/docs/sql/types/list/#type-name 4452 while self._match(TokenType.LIST): 4453 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4454 4455 index = self._index 4456 4457 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4458 matched_array = self._match(TokenType.ARRAY) 4459 4460 while self._curr: 4461 matched_l_bracket = self._match(TokenType.L_BRACKET) 4462 if not matched_l_bracket and not matched_array: 4463 break 4464 4465 matched_array = False 4466 values = self._parse_csv(self._parse_assignment) or None 4467 if values and not schema: 4468 self._retreat(index) 4469 break 4470 4471 this = exp.DataType( 4472 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4473 ) 4474 self._match(TokenType.R_BRACKET) 4475 4476 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4477 converter = self.TYPE_CONVERTERS.get(this.this) 4478 if converter: 4479 this = converter(t.cast(exp.DataType, this)) 4480 4481 return this 4482 4483 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4484 index = self._index 4485 this = ( 4486 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4487 or self._parse_id_var() 4488 ) 4489 self._match(TokenType.COLON) 4490 4491 if ( 4492 type_required 4493 and not isinstance(this, exp.DataType) 4494 and not self._match_set(self.TYPE_TOKENS, advance=False) 4495 ): 4496 self._retreat(index) 4497 return self._parse_types() 4498 4499 return self._parse_column_def(this) 4500 4501 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4502 if not self._match_text_seq("AT", "TIME", "ZONE"): 4503 return this 4504 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4505 4506 def _parse_column(self) -> t.Optional[exp.Expression]: 4507 this = self._parse_column_reference() 4508 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4509 4510 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4511 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4512 4513 return column 4514 4515 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4516 this = self._parse_field() 4517 if ( 4518 not this 4519 and self._match(TokenType.VALUES, advance=False) 4520 and self.VALUES_FOLLOWED_BY_PAREN 4521 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4522 ): 4523 this = self._parse_id_var() 4524 4525 if isinstance(this, exp.Identifier): 4526 # We bubble up comments from the Identifier to the Column 4527 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4528 4529 return this 4530 4531 def _parse_colon_as_json_extract( 4532 self, this: t.Optional[exp.Expression] 4533 ) -> t.Optional[exp.Expression]: 4534 casts = [] 4535 json_path = [] 4536 4537 while self._match(TokenType.COLON): 4538 start_index = self._index 4539 4540 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4541 path = self._parse_column_ops( 4542 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4543 ) 4544 4545 # The cast :: operator has a lower precedence than the extraction operator :, so 4546 # we rearrange the AST appropriately to avoid casting the JSON path 4547 while isinstance(path, exp.Cast): 4548 casts.append(path.to) 4549 path = path.this 4550 4551 if casts: 4552 dcolon_offset = next( 4553 i 4554 for i, t in enumerate(self._tokens[start_index:]) 4555 if t.token_type == TokenType.DCOLON 4556 ) 4557 end_token = self._tokens[start_index + dcolon_offset - 1] 4558 else: 4559 end_token = self._prev 4560 4561 if path: 4562 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4563 4564 if json_path: 4565 this = self.expression( 4566 exp.JSONExtract, 4567 this=this, 4568 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4569 ) 4570 4571 while casts: 4572 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4573 4574 return this 4575 4576 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4577 this = self._parse_bracket(this) 4578 4579 while self._match_set(self.COLUMN_OPERATORS): 4580 op_token = self._prev.token_type 4581 op = self.COLUMN_OPERATORS.get(op_token) 4582 4583 if op_token == TokenType.DCOLON: 4584 field = self._parse_types() 4585 if not field: 4586 self.raise_error("Expected type") 4587 elif op and self._curr: 4588 field = self._parse_column_reference() 4589 else: 4590 field = self._parse_field(any_token=True, anonymous_func=True) 4591 4592 if isinstance(field, exp.Func) and this: 4593 # bigquery allows function calls like x.y.count(...) 4594 # SAFE.SUBSTR(...) 4595 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4596 this = exp.replace_tree( 4597 this, 4598 lambda n: ( 4599 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4600 if n.table 4601 else n.this 4602 ) 4603 if isinstance(n, exp.Column) 4604 else n, 4605 ) 4606 4607 if op: 4608 this = op(self, this, field) 4609 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4610 this = self.expression( 4611 exp.Column, 4612 this=field, 4613 table=this.this, 4614 db=this.args.get("table"), 4615 catalog=this.args.get("db"), 4616 ) 4617 else: 4618 this = self.expression(exp.Dot, this=this, expression=field) 4619 4620 this = self._parse_bracket(this) 4621 4622 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4623 4624 def _parse_primary(self) -> t.Optional[exp.Expression]: 4625 if self._match_set(self.PRIMARY_PARSERS): 4626 token_type = self._prev.token_type 4627 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4628 4629 if token_type == TokenType.STRING: 4630 expressions = [primary] 4631 while self._match(TokenType.STRING): 4632 expressions.append(exp.Literal.string(self._prev.text)) 4633 4634 if len(expressions) > 1: 4635 return self.expression(exp.Concat, expressions=expressions) 4636 4637 return primary 4638 4639 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4640 return exp.Literal.number(f"0.{self._prev.text}") 4641 4642 if self._match(TokenType.L_PAREN): 4643 comments = self._prev_comments 4644 query = self._parse_select() 4645 4646 if query: 4647 expressions = [query] 4648 else: 4649 expressions = self._parse_expressions() 4650 4651 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4652 4653 if not this and self._match(TokenType.R_PAREN, advance=False): 4654 this = self.expression(exp.Tuple) 4655 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4656 this = self._parse_subquery(this=this, parse_alias=False) 4657 elif isinstance(this, exp.Subquery): 4658 this = self._parse_subquery( 4659 this=self._parse_set_operations(this), parse_alias=False 4660 ) 4661 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4662 this = self.expression(exp.Tuple, expressions=expressions) 4663 else: 4664 this = self.expression(exp.Paren, this=this) 4665 4666 if this: 4667 this.add_comments(comments) 4668 4669 self._match_r_paren(expression=this) 4670 return this 4671 4672 return None 4673 4674 def _parse_field( 4675 self, 4676 any_token: bool = False, 4677 tokens: t.Optional[t.Collection[TokenType]] = None, 4678 anonymous_func: bool = False, 4679 ) -> t.Optional[exp.Expression]: 4680 if anonymous_func: 4681 field = ( 4682 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4683 or self._parse_primary() 4684 ) 4685 else: 4686 field = self._parse_primary() or self._parse_function( 4687 anonymous=anonymous_func, any_token=any_token 4688 ) 4689 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4690 4691 def _parse_function( 4692 self, 4693 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4694 anonymous: bool = False, 4695 optional_parens: bool = True, 4696 any_token: bool = False, 4697 ) -> t.Optional[exp.Expression]: 4698 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4699 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4700 fn_syntax = False 4701 if ( 4702 self._match(TokenType.L_BRACE, advance=False) 4703 and self._next 4704 and self._next.text.upper() == "FN" 4705 ): 4706 self._advance(2) 4707 fn_syntax = True 4708 4709 func = self._parse_function_call( 4710 functions=functions, 4711 anonymous=anonymous, 4712 optional_parens=optional_parens, 4713 any_token=any_token, 4714 ) 4715 4716 if fn_syntax: 4717 self._match(TokenType.R_BRACE) 4718 4719 return func 4720 4721 def _parse_function_call( 4722 self, 4723 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4724 anonymous: bool = False, 4725 optional_parens: bool = True, 4726 any_token: bool = False, 4727 ) -> t.Optional[exp.Expression]: 4728 if not self._curr: 4729 return None 4730 4731 comments = self._curr.comments 4732 token_type = self._curr.token_type 4733 this = self._curr.text 4734 upper = this.upper() 4735 4736 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4737 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4738 self._advance() 4739 return self._parse_window(parser(self)) 4740 4741 if not self._next or self._next.token_type != TokenType.L_PAREN: 4742 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4743 self._advance() 4744 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4745 4746 return None 4747 4748 if any_token: 4749 if token_type in self.RESERVED_TOKENS: 4750 return None 4751 elif token_type not in self.FUNC_TOKENS: 4752 return None 4753 4754 self._advance(2) 4755 4756 parser = self.FUNCTION_PARSERS.get(upper) 4757 if parser and not anonymous: 4758 this = parser(self) 4759 else: 4760 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4761 4762 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4763 this = self.expression(subquery_predicate, this=self._parse_select()) 4764 self._match_r_paren() 4765 return this 4766 4767 if functions is None: 4768 functions = self.FUNCTIONS 4769 4770 function = functions.get(upper) 4771 4772 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4773 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4774 4775 if alias: 4776 args = self._kv_to_prop_eq(args) 4777 4778 if function and not anonymous: 4779 if "dialect" in function.__code__.co_varnames: 4780 func = function(args, dialect=self.dialect) 4781 else: 4782 func = function(args) 4783 4784 func = self.validate_expression(func, args) 4785 if not self.dialect.NORMALIZE_FUNCTIONS: 4786 func.meta["name"] = this 4787 4788 this = func 4789 else: 4790 if token_type == TokenType.IDENTIFIER: 4791 this = exp.Identifier(this=this, quoted=True) 4792 this = self.expression(exp.Anonymous, this=this, expressions=args) 4793 4794 if isinstance(this, exp.Expression): 4795 this.add_comments(comments) 4796 4797 self._match_r_paren(this) 4798 return self._parse_window(this) 4799 4800 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4801 transformed = [] 4802 4803 for e in expressions: 4804 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4805 if isinstance(e, exp.Alias): 4806 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4807 4808 if not isinstance(e, exp.PropertyEQ): 4809 e = self.expression( 4810 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4811 ) 4812 4813 if isinstance(e.this, exp.Column): 4814 e.this.replace(e.this.this) 4815 4816 transformed.append(e) 4817 4818 return transformed 4819 4820 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4821 return self._parse_column_def(self._parse_id_var()) 4822 4823 def _parse_user_defined_function( 4824 self, kind: t.Optional[TokenType] = None 4825 ) -> t.Optional[exp.Expression]: 4826 this = self._parse_id_var() 4827 4828 while self._match(TokenType.DOT): 4829 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4830 4831 if not self._match(TokenType.L_PAREN): 4832 return this 4833 4834 expressions = self._parse_csv(self._parse_function_parameter) 4835 self._match_r_paren() 4836 return self.expression( 4837 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4838 ) 4839 4840 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4841 literal = self._parse_primary() 4842 if literal: 4843 return self.expression(exp.Introducer, this=token.text, expression=literal) 4844 4845 return self.expression(exp.Identifier, this=token.text) 4846 4847 def _parse_session_parameter(self) -> exp.SessionParameter: 4848 kind = None 4849 this = self._parse_id_var() or self._parse_primary() 4850 4851 if this and self._match(TokenType.DOT): 4852 kind = this.name 4853 this = self._parse_var() or self._parse_primary() 4854 4855 return self.expression(exp.SessionParameter, this=this, kind=kind) 4856 4857 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4858 return self._parse_id_var() 4859 4860 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4861 index = self._index 4862 4863 if self._match(TokenType.L_PAREN): 4864 expressions = t.cast( 4865 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4866 ) 4867 4868 if not self._match(TokenType.R_PAREN): 4869 self._retreat(index) 4870 else: 4871 expressions = [self._parse_lambda_arg()] 4872 4873 if self._match_set(self.LAMBDAS): 4874 return self.LAMBDAS[self._prev.token_type](self, expressions) 4875 4876 self._retreat(index) 4877 4878 this: t.Optional[exp.Expression] 4879 4880 if self._match(TokenType.DISTINCT): 4881 this = self.expression( 4882 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4883 ) 4884 else: 4885 this = self._parse_select_or_expression(alias=alias) 4886 4887 return self._parse_limit( 4888 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4889 ) 4890 4891 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4892 index = self._index 4893 if not self._match(TokenType.L_PAREN): 4894 return this 4895 4896 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4897 # expr can be of both types 4898 if self._match_set(self.SELECT_START_TOKENS): 4899 self._retreat(index) 4900 return this 4901 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4902 self._match_r_paren() 4903 return self.expression(exp.Schema, this=this, expressions=args) 4904 4905 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4906 return self._parse_column_def(self._parse_field(any_token=True)) 4907 4908 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4909 # column defs are not really columns, they're identifiers 4910 if isinstance(this, exp.Column): 4911 this = this.this 4912 4913 kind = self._parse_types(schema=True) 4914 4915 if self._match_text_seq("FOR", "ORDINALITY"): 4916 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4917 4918 constraints: t.List[exp.Expression] = [] 4919 4920 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4921 ("ALIAS", "MATERIALIZED") 4922 ): 4923 persisted = self._prev.text.upper() == "MATERIALIZED" 4924 constraints.append( 4925 self.expression( 4926 exp.ComputedColumnConstraint, 4927 this=self._parse_assignment(), 4928 persisted=persisted or self._match_text_seq("PERSISTED"), 4929 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4930 ) 4931 ) 4932 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4933 self._match(TokenType.ALIAS) 4934 constraints.append( 4935 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4936 ) 4937 4938 while True: 4939 constraint = self._parse_column_constraint() 4940 if not constraint: 4941 break 4942 constraints.append(constraint) 4943 4944 if not kind and not constraints: 4945 return this 4946 4947 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4948 4949 def _parse_auto_increment( 4950 self, 4951 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4952 start = None 4953 increment = None 4954 4955 if self._match(TokenType.L_PAREN, advance=False): 4956 args = self._parse_wrapped_csv(self._parse_bitwise) 4957 start = seq_get(args, 0) 4958 increment = seq_get(args, 1) 4959 elif self._match_text_seq("START"): 4960 start = self._parse_bitwise() 4961 self._match_text_seq("INCREMENT") 4962 increment = self._parse_bitwise() 4963 4964 if start and increment: 4965 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4966 4967 return exp.AutoIncrementColumnConstraint() 4968 4969 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4970 if not self._match_text_seq("REFRESH"): 4971 self._retreat(self._index - 1) 4972 return None 4973 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4974 4975 def _parse_compress(self) -> exp.CompressColumnConstraint: 4976 if self._match(TokenType.L_PAREN, advance=False): 4977 return self.expression( 4978 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4979 ) 4980 4981 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4982 4983 def _parse_generated_as_identity( 4984 self, 4985 ) -> ( 4986 exp.GeneratedAsIdentityColumnConstraint 4987 | exp.ComputedColumnConstraint 4988 | exp.GeneratedAsRowColumnConstraint 4989 ): 4990 if self._match_text_seq("BY", "DEFAULT"): 4991 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4992 this = self.expression( 4993 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4994 ) 4995 else: 4996 self._match_text_seq("ALWAYS") 4997 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4998 4999 self._match(TokenType.ALIAS) 5000 5001 if self._match_text_seq("ROW"): 5002 start = self._match_text_seq("START") 5003 if not start: 5004 self._match(TokenType.END) 5005 hidden = self._match_text_seq("HIDDEN") 5006 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5007 5008 identity = self._match_text_seq("IDENTITY") 5009 5010 if self._match(TokenType.L_PAREN): 5011 if self._match(TokenType.START_WITH): 5012 this.set("start", self._parse_bitwise()) 5013 if self._match_text_seq("INCREMENT", "BY"): 5014 this.set("increment", self._parse_bitwise()) 5015 if self._match_text_seq("MINVALUE"): 5016 this.set("minvalue", self._parse_bitwise()) 5017 if self._match_text_seq("MAXVALUE"): 5018 this.set("maxvalue", self._parse_bitwise()) 5019 5020 if self._match_text_seq("CYCLE"): 5021 this.set("cycle", True) 5022 elif self._match_text_seq("NO", "CYCLE"): 5023 this.set("cycle", False) 5024 5025 if not identity: 5026 this.set("expression", self._parse_range()) 5027 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5028 args = self._parse_csv(self._parse_bitwise) 5029 this.set("start", seq_get(args, 0)) 5030 this.set("increment", seq_get(args, 1)) 5031 5032 self._match_r_paren() 5033 5034 return this 5035 5036 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5037 self._match_text_seq("LENGTH") 5038 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5039 5040 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5041 if self._match_text_seq("NULL"): 5042 return self.expression(exp.NotNullColumnConstraint) 5043 if self._match_text_seq("CASESPECIFIC"): 5044 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5045 if self._match_text_seq("FOR", "REPLICATION"): 5046 return self.expression(exp.NotForReplicationColumnConstraint) 5047 return None 5048 5049 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5050 if self._match(TokenType.CONSTRAINT): 5051 this = self._parse_id_var() 5052 else: 5053 this = None 5054 5055 if self._match_texts(self.CONSTRAINT_PARSERS): 5056 return self.expression( 5057 exp.ColumnConstraint, 5058 this=this, 5059 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5060 ) 5061 5062 return this 5063 5064 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5065 if not self._match(TokenType.CONSTRAINT): 5066 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5067 5068 return self.expression( 5069 exp.Constraint, 5070 this=self._parse_id_var(), 5071 expressions=self._parse_unnamed_constraints(), 5072 ) 5073 5074 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5075 constraints = [] 5076 while True: 5077 constraint = self._parse_unnamed_constraint() or self._parse_function() 5078 if not constraint: 5079 break 5080 constraints.append(constraint) 5081 5082 return constraints 5083 5084 def _parse_unnamed_constraint( 5085 self, constraints: t.Optional[t.Collection[str]] = None 5086 ) -> t.Optional[exp.Expression]: 5087 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5088 constraints or self.CONSTRAINT_PARSERS 5089 ): 5090 return None 5091 5092 constraint = self._prev.text.upper() 5093 if constraint not in self.CONSTRAINT_PARSERS: 5094 self.raise_error(f"No parser found for schema constraint {constraint}.") 5095 5096 return self.CONSTRAINT_PARSERS[constraint](self) 5097 5098 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5099 self._match_text_seq("KEY") 5100 return self.expression( 5101 exp.UniqueColumnConstraint, 5102 this=self._parse_schema(self._parse_id_var(any_token=False)), 5103 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5104 on_conflict=self._parse_on_conflict(), 5105 ) 5106 5107 def _parse_key_constraint_options(self) -> t.List[str]: 5108 options = [] 5109 while True: 5110 if not self._curr: 5111 break 5112 5113 if self._match(TokenType.ON): 5114 action = None 5115 on = self._advance_any() and self._prev.text 5116 5117 if self._match_text_seq("NO", "ACTION"): 5118 action = "NO ACTION" 5119 elif self._match_text_seq("CASCADE"): 5120 action = "CASCADE" 5121 elif self._match_text_seq("RESTRICT"): 5122 action = "RESTRICT" 5123 elif self._match_pair(TokenType.SET, TokenType.NULL): 5124 action = "SET NULL" 5125 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5126 action = "SET DEFAULT" 5127 else: 5128 self.raise_error("Invalid key constraint") 5129 5130 options.append(f"ON {on} {action}") 5131 elif self._match_text_seq("NOT", "ENFORCED"): 5132 options.append("NOT ENFORCED") 5133 elif self._match_text_seq("DEFERRABLE"): 5134 options.append("DEFERRABLE") 5135 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5136 options.append("INITIALLY DEFERRED") 5137 elif self._match_text_seq("NORELY"): 5138 options.append("NORELY") 5139 elif self._match_text_seq("MATCH", "FULL"): 5140 options.append("MATCH FULL") 5141 else: 5142 break 5143 5144 return options 5145 5146 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5147 if match and not self._match(TokenType.REFERENCES): 5148 return None 5149 5150 expressions = None 5151 this = self._parse_table(schema=True) 5152 options = self._parse_key_constraint_options() 5153 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5154 5155 def _parse_foreign_key(self) -> exp.ForeignKey: 5156 expressions = self._parse_wrapped_id_vars() 5157 reference = self._parse_references() 5158 options = {} 5159 5160 while self._match(TokenType.ON): 5161 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5162 self.raise_error("Expected DELETE or UPDATE") 5163 5164 kind = self._prev.text.lower() 5165 5166 if self._match_text_seq("NO", "ACTION"): 5167 action = "NO ACTION" 5168 elif self._match(TokenType.SET): 5169 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5170 action = "SET " + self._prev.text.upper() 5171 else: 5172 self._advance() 5173 action = self._prev.text.upper() 5174 5175 options[kind] = action 5176 5177 return self.expression( 5178 exp.ForeignKey, 5179 expressions=expressions, 5180 reference=reference, 5181 **options, # type: ignore 5182 ) 5183 5184 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5185 return self._parse_field() 5186 5187 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5188 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5189 self._retreat(self._index - 1) 5190 return None 5191 5192 id_vars = self._parse_wrapped_id_vars() 5193 return self.expression( 5194 exp.PeriodForSystemTimeConstraint, 5195 this=seq_get(id_vars, 0), 5196 expression=seq_get(id_vars, 1), 5197 ) 5198 5199 def _parse_primary_key( 5200 self, wrapped_optional: bool = False, in_props: bool = False 5201 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5202 desc = ( 5203 self._match_set((TokenType.ASC, TokenType.DESC)) 5204 and self._prev.token_type == TokenType.DESC 5205 ) 5206 5207 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5208 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5209 5210 expressions = self._parse_wrapped_csv( 5211 self._parse_primary_key_part, optional=wrapped_optional 5212 ) 5213 options = self._parse_key_constraint_options() 5214 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5215 5216 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5217 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5218 5219 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5220 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5221 return this 5222 5223 bracket_kind = self._prev.token_type 5224 expressions = self._parse_csv( 5225 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5226 ) 5227 5228 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5229 self.raise_error("Expected ]") 5230 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5231 self.raise_error("Expected }") 5232 5233 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5234 if bracket_kind == TokenType.L_BRACE: 5235 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5236 elif not this: 5237 this = self.expression(exp.Array, expressions=expressions) 5238 else: 5239 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5240 if constructor_type: 5241 return self.expression(constructor_type, expressions=expressions) 5242 5243 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5244 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5245 5246 self._add_comments(this) 5247 return self._parse_bracket(this) 5248 5249 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5250 if self._match(TokenType.COLON): 5251 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5252 return this 5253 5254 def _parse_case(self) -> t.Optional[exp.Expression]: 5255 ifs = [] 5256 default = None 5257 5258 comments = self._prev_comments 5259 expression = self._parse_assignment() 5260 5261 while self._match(TokenType.WHEN): 5262 this = self._parse_assignment() 5263 self._match(TokenType.THEN) 5264 then = self._parse_assignment() 5265 ifs.append(self.expression(exp.If, this=this, true=then)) 5266 5267 if self._match(TokenType.ELSE): 5268 default = self._parse_assignment() 5269 5270 if not self._match(TokenType.END): 5271 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5272 default = exp.column("interval") 5273 else: 5274 self.raise_error("Expected END after CASE", self._prev) 5275 5276 return self.expression( 5277 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5278 ) 5279 5280 def _parse_if(self) -> t.Optional[exp.Expression]: 5281 if self._match(TokenType.L_PAREN): 5282 args = self._parse_csv(self._parse_assignment) 5283 this = self.validate_expression(exp.If.from_arg_list(args), args) 5284 self._match_r_paren() 5285 else: 5286 index = self._index - 1 5287 5288 if self.NO_PAREN_IF_COMMANDS and index == 0: 5289 return self._parse_as_command(self._prev) 5290 5291 condition = self._parse_assignment() 5292 5293 if not condition: 5294 self._retreat(index) 5295 return None 5296 5297 self._match(TokenType.THEN) 5298 true = self._parse_assignment() 5299 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5300 self._match(TokenType.END) 5301 this = self.expression(exp.If, this=condition, true=true, false=false) 5302 5303 return this 5304 5305 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5306 if not self._match_text_seq("VALUE", "FOR"): 5307 self._retreat(self._index - 1) 5308 return None 5309 5310 return self.expression( 5311 exp.NextValueFor, 5312 this=self._parse_column(), 5313 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5314 ) 5315 5316 def _parse_extract(self) -> exp.Extract: 5317 this = self._parse_function() or self._parse_var() or self._parse_type() 5318 5319 if self._match(TokenType.FROM): 5320 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5321 5322 if not self._match(TokenType.COMMA): 5323 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5324 5325 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5326 5327 def _parse_gap_fill(self) -> exp.GapFill: 5328 self._match(TokenType.TABLE) 5329 this = self._parse_table() 5330 5331 self._match(TokenType.COMMA) 5332 args = [this, *self._parse_csv(self._parse_lambda)] 5333 5334 gap_fill = exp.GapFill.from_arg_list(args) 5335 return self.validate_expression(gap_fill, args) 5336 5337 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5338 this = self._parse_assignment() 5339 5340 if not self._match(TokenType.ALIAS): 5341 if self._match(TokenType.COMMA): 5342 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5343 5344 self.raise_error("Expected AS after CAST") 5345 5346 fmt = None 5347 to = self._parse_types() 5348 5349 if self._match(TokenType.FORMAT): 5350 fmt_string = self._parse_string() 5351 fmt = self._parse_at_time_zone(fmt_string) 5352 5353 if not to: 5354 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5355 if to.this in exp.DataType.TEMPORAL_TYPES: 5356 this = self.expression( 5357 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5358 this=this, 5359 format=exp.Literal.string( 5360 format_time( 5361 fmt_string.this if fmt_string else "", 5362 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5363 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5364 ) 5365 ), 5366 ) 5367 5368 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5369 this.set("zone", fmt.args["zone"]) 5370 return this 5371 elif not to: 5372 self.raise_error("Expected TYPE after CAST") 5373 elif isinstance(to, exp.Identifier): 5374 to = exp.DataType.build(to.name, udt=True) 5375 elif to.this == exp.DataType.Type.CHAR: 5376 if self._match(TokenType.CHARACTER_SET): 5377 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5378 5379 return self.expression( 5380 exp.Cast if strict else exp.TryCast, 5381 this=this, 5382 to=to, 5383 format=fmt, 5384 safe=safe, 5385 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5386 ) 5387 5388 def _parse_string_agg(self) -> exp.Expression: 5389 if self._match(TokenType.DISTINCT): 5390 args: t.List[t.Optional[exp.Expression]] = [ 5391 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5392 ] 5393 if self._match(TokenType.COMMA): 5394 args.extend(self._parse_csv(self._parse_assignment)) 5395 else: 5396 args = self._parse_csv(self._parse_assignment) # type: ignore 5397 5398 index = self._index 5399 if not self._match(TokenType.R_PAREN) and args: 5400 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5401 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5402 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5403 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5404 5405 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5406 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5407 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5408 if not self._match_text_seq("WITHIN", "GROUP"): 5409 self._retreat(index) 5410 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5411 5412 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5413 order = self._parse_order(this=seq_get(args, 0)) 5414 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5415 5416 def _parse_convert( 5417 self, strict: bool, safe: t.Optional[bool] = None 5418 ) -> t.Optional[exp.Expression]: 5419 this = self._parse_bitwise() 5420 5421 if self._match(TokenType.USING): 5422 to: t.Optional[exp.Expression] = self.expression( 5423 exp.CharacterSet, this=self._parse_var() 5424 ) 5425 elif self._match(TokenType.COMMA): 5426 to = self._parse_types() 5427 else: 5428 to = None 5429 5430 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5431 5432 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5433 """ 5434 There are generally two variants of the DECODE function: 5435 5436 - DECODE(bin, charset) 5437 - DECODE(expression, search, result [, search, result] ... [, default]) 5438 5439 The second variant will always be parsed into a CASE expression. Note that NULL 5440 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5441 instead of relying on pattern matching. 5442 """ 5443 args = self._parse_csv(self._parse_assignment) 5444 5445 if len(args) < 3: 5446 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5447 5448 expression, *expressions = args 5449 if not expression: 5450 return None 5451 5452 ifs = [] 5453 for search, result in zip(expressions[::2], expressions[1::2]): 5454 if not search or not result: 5455 return None 5456 5457 if isinstance(search, exp.Literal): 5458 ifs.append( 5459 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5460 ) 5461 elif isinstance(search, exp.Null): 5462 ifs.append( 5463 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5464 ) 5465 else: 5466 cond = exp.or_( 5467 exp.EQ(this=expression.copy(), expression=search), 5468 exp.and_( 5469 exp.Is(this=expression.copy(), expression=exp.Null()), 5470 exp.Is(this=search.copy(), expression=exp.Null()), 5471 copy=False, 5472 ), 5473 copy=False, 5474 ) 5475 ifs.append(exp.If(this=cond, true=result)) 5476 5477 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5478 5479 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5480 self._match_text_seq("KEY") 5481 key = self._parse_column() 5482 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5483 self._match_text_seq("VALUE") 5484 value = self._parse_bitwise() 5485 5486 if not key and not value: 5487 return None 5488 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5489 5490 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5491 if not this or not self._match_text_seq("FORMAT", "JSON"): 5492 return this 5493 5494 return self.expression(exp.FormatJson, this=this) 5495 5496 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5497 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5498 for value in values: 5499 if self._match_text_seq(value, "ON", on): 5500 return f"{value} ON {on}" 5501 5502 return None 5503 5504 @t.overload 5505 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5506 5507 @t.overload 5508 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5509 5510 def _parse_json_object(self, agg=False): 5511 star = self._parse_star() 5512 expressions = ( 5513 [star] 5514 if star 5515 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5516 ) 5517 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5518 5519 unique_keys = None 5520 if self._match_text_seq("WITH", "UNIQUE"): 5521 unique_keys = True 5522 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5523 unique_keys = False 5524 5525 self._match_text_seq("KEYS") 5526 5527 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5528 self._parse_type() 5529 ) 5530 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5531 5532 return self.expression( 5533 exp.JSONObjectAgg if agg else exp.JSONObject, 5534 expressions=expressions, 5535 null_handling=null_handling, 5536 unique_keys=unique_keys, 5537 return_type=return_type, 5538 encoding=encoding, 5539 ) 5540 5541 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5542 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5543 if not self._match_text_seq("NESTED"): 5544 this = self._parse_id_var() 5545 kind = self._parse_types(allow_identifiers=False) 5546 nested = None 5547 else: 5548 this = None 5549 kind = None 5550 nested = True 5551 5552 path = self._match_text_seq("PATH") and self._parse_string() 5553 nested_schema = nested and self._parse_json_schema() 5554 5555 return self.expression( 5556 exp.JSONColumnDef, 5557 this=this, 5558 kind=kind, 5559 path=path, 5560 nested_schema=nested_schema, 5561 ) 5562 5563 def _parse_json_schema(self) -> exp.JSONSchema: 5564 self._match_text_seq("COLUMNS") 5565 return self.expression( 5566 exp.JSONSchema, 5567 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5568 ) 5569 5570 def _parse_json_table(self) -> exp.JSONTable: 5571 this = self._parse_format_json(self._parse_bitwise()) 5572 path = self._match(TokenType.COMMA) and self._parse_string() 5573 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5574 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5575 schema = self._parse_json_schema() 5576 5577 return exp.JSONTable( 5578 this=this, 5579 schema=schema, 5580 path=path, 5581 error_handling=error_handling, 5582 empty_handling=empty_handling, 5583 ) 5584 5585 def _parse_match_against(self) -> exp.MatchAgainst: 5586 expressions = self._parse_csv(self._parse_column) 5587 5588 self._match_text_seq(")", "AGAINST", "(") 5589 5590 this = self._parse_string() 5591 5592 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5593 modifier = "IN NATURAL LANGUAGE MODE" 5594 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5595 modifier = f"{modifier} WITH QUERY EXPANSION" 5596 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5597 modifier = "IN BOOLEAN MODE" 5598 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5599 modifier = "WITH QUERY EXPANSION" 5600 else: 5601 modifier = None 5602 5603 return self.expression( 5604 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5605 ) 5606 5607 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5608 def _parse_open_json(self) -> exp.OpenJSON: 5609 this = self._parse_bitwise() 5610 path = self._match(TokenType.COMMA) and self._parse_string() 5611 5612 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5613 this = self._parse_field(any_token=True) 5614 kind = self._parse_types() 5615 path = self._parse_string() 5616 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5617 5618 return self.expression( 5619 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5620 ) 5621 5622 expressions = None 5623 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5624 self._match_l_paren() 5625 expressions = self._parse_csv(_parse_open_json_column_def) 5626 5627 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5628 5629 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5630 args = self._parse_csv(self._parse_bitwise) 5631 5632 if self._match(TokenType.IN): 5633 return self.expression( 5634 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5635 ) 5636 5637 if haystack_first: 5638 haystack = seq_get(args, 0) 5639 needle = seq_get(args, 1) 5640 else: 5641 needle = seq_get(args, 0) 5642 haystack = seq_get(args, 1) 5643 5644 return self.expression( 5645 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5646 ) 5647 5648 def _parse_predict(self) -> exp.Predict: 5649 self._match_text_seq("MODEL") 5650 this = self._parse_table() 5651 5652 self._match(TokenType.COMMA) 5653 self._match_text_seq("TABLE") 5654 5655 return self.expression( 5656 exp.Predict, 5657 this=this, 5658 expression=self._parse_table(), 5659 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5660 ) 5661 5662 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5663 args = self._parse_csv(self._parse_table) 5664 return exp.JoinHint(this=func_name.upper(), expressions=args) 5665 5666 def _parse_substring(self) -> exp.Substring: 5667 # Postgres supports the form: substring(string [from int] [for int]) 5668 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5669 5670 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5671 5672 if self._match(TokenType.FROM): 5673 args.append(self._parse_bitwise()) 5674 if self._match(TokenType.FOR): 5675 if len(args) == 1: 5676 args.append(exp.Literal.number(1)) 5677 args.append(self._parse_bitwise()) 5678 5679 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5680 5681 def _parse_trim(self) -> exp.Trim: 5682 # https://www.w3resource.com/sql/character-functions/trim.php 5683 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5684 5685 position = None 5686 collation = None 5687 expression = None 5688 5689 if self._match_texts(self.TRIM_TYPES): 5690 position = self._prev.text.upper() 5691 5692 this = self._parse_bitwise() 5693 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5694 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5695 expression = self._parse_bitwise() 5696 5697 if invert_order: 5698 this, expression = expression, this 5699 5700 if self._match(TokenType.COLLATE): 5701 collation = self._parse_bitwise() 5702 5703 return self.expression( 5704 exp.Trim, this=this, position=position, expression=expression, collation=collation 5705 ) 5706 5707 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5708 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5709 5710 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5711 return self._parse_window(self._parse_id_var(), alias=True) 5712 5713 def _parse_respect_or_ignore_nulls( 5714 self, this: t.Optional[exp.Expression] 5715 ) -> t.Optional[exp.Expression]: 5716 if self._match_text_seq("IGNORE", "NULLS"): 5717 return self.expression(exp.IgnoreNulls, this=this) 5718 if self._match_text_seq("RESPECT", "NULLS"): 5719 return self.expression(exp.RespectNulls, this=this) 5720 return this 5721 5722 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5723 if self._match(TokenType.HAVING): 5724 self._match_texts(("MAX", "MIN")) 5725 max = self._prev.text.upper() != "MIN" 5726 return self.expression( 5727 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5728 ) 5729 5730 return this 5731 5732 def _parse_window( 5733 self, this: t.Optional[exp.Expression], alias: bool = False 5734 ) -> t.Optional[exp.Expression]: 5735 func = this 5736 comments = func.comments if isinstance(func, exp.Expression) else None 5737 5738 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5739 self._match(TokenType.WHERE) 5740 this = self.expression( 5741 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5742 ) 5743 self._match_r_paren() 5744 5745 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5746 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5747 if self._match_text_seq("WITHIN", "GROUP"): 5748 order = self._parse_wrapped(self._parse_order) 5749 this = self.expression(exp.WithinGroup, this=this, expression=order) 5750 5751 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5752 # Some dialects choose to implement and some do not. 5753 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5754 5755 # There is some code above in _parse_lambda that handles 5756 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5757 5758 # The below changes handle 5759 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5760 5761 # Oracle allows both formats 5762 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5763 # and Snowflake chose to do the same for familiarity 5764 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5765 if isinstance(this, exp.AggFunc): 5766 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5767 5768 if ignore_respect and ignore_respect is not this: 5769 ignore_respect.replace(ignore_respect.this) 5770 this = self.expression(ignore_respect.__class__, this=this) 5771 5772 this = self._parse_respect_or_ignore_nulls(this) 5773 5774 # bigquery select from window x AS (partition by ...) 5775 if alias: 5776 over = None 5777 self._match(TokenType.ALIAS) 5778 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5779 return this 5780 else: 5781 over = self._prev.text.upper() 5782 5783 if comments and isinstance(func, exp.Expression): 5784 func.pop_comments() 5785 5786 if not self._match(TokenType.L_PAREN): 5787 return self.expression( 5788 exp.Window, 5789 comments=comments, 5790 this=this, 5791 alias=self._parse_id_var(False), 5792 over=over, 5793 ) 5794 5795 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5796 5797 first = self._match(TokenType.FIRST) 5798 if self._match_text_seq("LAST"): 5799 first = False 5800 5801 partition, order = self._parse_partition_and_order() 5802 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5803 5804 if kind: 5805 self._match(TokenType.BETWEEN) 5806 start = self._parse_window_spec() 5807 self._match(TokenType.AND) 5808 end = self._parse_window_spec() 5809 5810 spec = self.expression( 5811 exp.WindowSpec, 5812 kind=kind, 5813 start=start["value"], 5814 start_side=start["side"], 5815 end=end["value"], 5816 end_side=end["side"], 5817 ) 5818 else: 5819 spec = None 5820 5821 self._match_r_paren() 5822 5823 window = self.expression( 5824 exp.Window, 5825 comments=comments, 5826 this=this, 5827 partition_by=partition, 5828 order=order, 5829 spec=spec, 5830 alias=window_alias, 5831 over=over, 5832 first=first, 5833 ) 5834 5835 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5836 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5837 return self._parse_window(window, alias=alias) 5838 5839 return window 5840 5841 def _parse_partition_and_order( 5842 self, 5843 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5844 return self._parse_partition_by(), self._parse_order() 5845 5846 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5847 self._match(TokenType.BETWEEN) 5848 5849 return { 5850 "value": ( 5851 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5852 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5853 or self._parse_bitwise() 5854 ), 5855 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5856 } 5857 5858 def _parse_alias( 5859 self, this: t.Optional[exp.Expression], explicit: bool = False 5860 ) -> t.Optional[exp.Expression]: 5861 any_token = self._match(TokenType.ALIAS) 5862 comments = self._prev_comments or [] 5863 5864 if explicit and not any_token: 5865 return this 5866 5867 if self._match(TokenType.L_PAREN): 5868 aliases = self.expression( 5869 exp.Aliases, 5870 comments=comments, 5871 this=this, 5872 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5873 ) 5874 self._match_r_paren(aliases) 5875 return aliases 5876 5877 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5878 self.STRING_ALIASES and self._parse_string_as_identifier() 5879 ) 5880 5881 if alias: 5882 comments.extend(alias.pop_comments()) 5883 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5884 column = this.this 5885 5886 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5887 if not this.comments and column and column.comments: 5888 this.comments = column.pop_comments() 5889 5890 return this 5891 5892 def _parse_id_var( 5893 self, 5894 any_token: bool = True, 5895 tokens: t.Optional[t.Collection[TokenType]] = None, 5896 ) -> t.Optional[exp.Expression]: 5897 expression = self._parse_identifier() 5898 if not expression and ( 5899 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5900 ): 5901 quoted = self._prev.token_type == TokenType.STRING 5902 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5903 5904 return expression 5905 5906 def _parse_string(self) -> t.Optional[exp.Expression]: 5907 if self._match_set(self.STRING_PARSERS): 5908 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5909 return self._parse_placeholder() 5910 5911 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5912 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5913 5914 def _parse_number(self) -> t.Optional[exp.Expression]: 5915 if self._match_set(self.NUMERIC_PARSERS): 5916 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5917 return self._parse_placeholder() 5918 5919 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5920 if self._match(TokenType.IDENTIFIER): 5921 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5922 return self._parse_placeholder() 5923 5924 def _parse_var( 5925 self, 5926 any_token: bool = False, 5927 tokens: t.Optional[t.Collection[TokenType]] = None, 5928 upper: bool = False, 5929 ) -> t.Optional[exp.Expression]: 5930 if ( 5931 (any_token and self._advance_any()) 5932 or self._match(TokenType.VAR) 5933 or (self._match_set(tokens) if tokens else False) 5934 ): 5935 return self.expression( 5936 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5937 ) 5938 return self._parse_placeholder() 5939 5940 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5941 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5942 self._advance() 5943 return self._prev 5944 return None 5945 5946 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5947 return self._parse_var() or self._parse_string() 5948 5949 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5950 return self._parse_primary() or self._parse_var(any_token=True) 5951 5952 def _parse_null(self) -> t.Optional[exp.Expression]: 5953 if self._match_set(self.NULL_TOKENS): 5954 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5955 return self._parse_placeholder() 5956 5957 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5958 if self._match(TokenType.TRUE): 5959 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5960 if self._match(TokenType.FALSE): 5961 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5962 return self._parse_placeholder() 5963 5964 def _parse_star(self) -> t.Optional[exp.Expression]: 5965 if self._match(TokenType.STAR): 5966 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5967 return self._parse_placeholder() 5968 5969 def _parse_parameter(self) -> exp.Parameter: 5970 this = self._parse_identifier() or self._parse_primary_or_var() 5971 return self.expression(exp.Parameter, this=this) 5972 5973 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5974 if self._match_set(self.PLACEHOLDER_PARSERS): 5975 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5976 if placeholder: 5977 return placeholder 5978 self._advance(-1) 5979 return None 5980 5981 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5982 if not self._match_texts(keywords): 5983 return None 5984 if self._match(TokenType.L_PAREN, advance=False): 5985 return self._parse_wrapped_csv(self._parse_expression) 5986 5987 expression = self._parse_expression() 5988 return [expression] if expression else None 5989 5990 def _parse_csv( 5991 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5992 ) -> t.List[exp.Expression]: 5993 parse_result = parse_method() 5994 items = [parse_result] if parse_result is not None else [] 5995 5996 while self._match(sep): 5997 self._add_comments(parse_result) 5998 parse_result = parse_method() 5999 if parse_result is not None: 6000 items.append(parse_result) 6001 6002 return items 6003 6004 def _parse_tokens( 6005 self, parse_method: t.Callable, expressions: t.Dict 6006 ) -> t.Optional[exp.Expression]: 6007 this = parse_method() 6008 6009 while self._match_set(expressions): 6010 this = self.expression( 6011 expressions[self._prev.token_type], 6012 this=this, 6013 comments=self._prev_comments, 6014 expression=parse_method(), 6015 ) 6016 6017 return this 6018 6019 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6020 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6021 6022 def _parse_wrapped_csv( 6023 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6024 ) -> t.List[exp.Expression]: 6025 return self._parse_wrapped( 6026 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6027 ) 6028 6029 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6030 wrapped = self._match(TokenType.L_PAREN) 6031 if not wrapped and not optional: 6032 self.raise_error("Expecting (") 6033 parse_result = parse_method() 6034 if wrapped: 6035 self._match_r_paren() 6036 return parse_result 6037 6038 def _parse_expressions(self) -> t.List[exp.Expression]: 6039 return self._parse_csv(self._parse_expression) 6040 6041 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6042 return self._parse_select() or self._parse_set_operations( 6043 self._parse_expression() if alias else self._parse_assignment() 6044 ) 6045 6046 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6047 return self._parse_query_modifiers( 6048 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6049 ) 6050 6051 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6052 this = None 6053 if self._match_texts(self.TRANSACTION_KIND): 6054 this = self._prev.text 6055 6056 self._match_texts(("TRANSACTION", "WORK")) 6057 6058 modes = [] 6059 while True: 6060 mode = [] 6061 while self._match(TokenType.VAR): 6062 mode.append(self._prev.text) 6063 6064 if mode: 6065 modes.append(" ".join(mode)) 6066 if not self._match(TokenType.COMMA): 6067 break 6068 6069 return self.expression(exp.Transaction, this=this, modes=modes) 6070 6071 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6072 chain = None 6073 savepoint = None 6074 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6075 6076 self._match_texts(("TRANSACTION", "WORK")) 6077 6078 if self._match_text_seq("TO"): 6079 self._match_text_seq("SAVEPOINT") 6080 savepoint = self._parse_id_var() 6081 6082 if self._match(TokenType.AND): 6083 chain = not self._match_text_seq("NO") 6084 self._match_text_seq("CHAIN") 6085 6086 if is_rollback: 6087 return self.expression(exp.Rollback, savepoint=savepoint) 6088 6089 return self.expression(exp.Commit, chain=chain) 6090 6091 def _parse_refresh(self) -> exp.Refresh: 6092 self._match(TokenType.TABLE) 6093 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6094 6095 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6096 if not self._match_text_seq("ADD"): 6097 return None 6098 6099 self._match(TokenType.COLUMN) 6100 exists_column = self._parse_exists(not_=True) 6101 expression = self._parse_field_def() 6102 6103 if expression: 6104 expression.set("exists", exists_column) 6105 6106 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6107 if self._match_texts(("FIRST", "AFTER")): 6108 position = self._prev.text 6109 column_position = self.expression( 6110 exp.ColumnPosition, this=self._parse_column(), position=position 6111 ) 6112 expression.set("position", column_position) 6113 6114 return expression 6115 6116 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6117 drop = self._match(TokenType.DROP) and self._parse_drop() 6118 if drop and not isinstance(drop, exp.Command): 6119 drop.set("kind", drop.args.get("kind", "COLUMN")) 6120 return drop 6121 6122 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6123 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6124 return self.expression( 6125 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6126 ) 6127 6128 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6129 index = self._index - 1 6130 6131 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6132 return self._parse_csv( 6133 lambda: self.expression( 6134 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6135 ) 6136 ) 6137 6138 self._retreat(index) 6139 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6140 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6141 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6142 6143 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6144 if self._match_texts(self.ALTER_ALTER_PARSERS): 6145 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6146 6147 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6148 # keyword after ALTER we default to parsing this statement 6149 self._match(TokenType.COLUMN) 6150 column = self._parse_field(any_token=True) 6151 6152 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6153 return self.expression(exp.AlterColumn, this=column, drop=True) 6154 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6155 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6156 if self._match(TokenType.COMMENT): 6157 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6158 if self._match_text_seq("DROP", "NOT", "NULL"): 6159 return self.expression( 6160 exp.AlterColumn, 6161 this=column, 6162 drop=True, 6163 allow_null=True, 6164 ) 6165 if self._match_text_seq("SET", "NOT", "NULL"): 6166 return self.expression( 6167 exp.AlterColumn, 6168 this=column, 6169 allow_null=False, 6170 ) 6171 self._match_text_seq("SET", "DATA") 6172 self._match_text_seq("TYPE") 6173 return self.expression( 6174 exp.AlterColumn, 6175 this=column, 6176 dtype=self._parse_types(), 6177 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6178 using=self._match(TokenType.USING) and self._parse_assignment(), 6179 ) 6180 6181 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6182 if self._match_texts(("ALL", "EVEN", "AUTO")): 6183 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6184 6185 self._match_text_seq("KEY", "DISTKEY") 6186 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6187 6188 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6189 if compound: 6190 self._match_text_seq("SORTKEY") 6191 6192 if self._match(TokenType.L_PAREN, advance=False): 6193 return self.expression( 6194 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6195 ) 6196 6197 self._match_texts(("AUTO", "NONE")) 6198 return self.expression( 6199 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6200 ) 6201 6202 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6203 index = self._index - 1 6204 6205 partition_exists = self._parse_exists() 6206 if self._match(TokenType.PARTITION, advance=False): 6207 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6208 6209 self._retreat(index) 6210 return self._parse_csv(self._parse_drop_column) 6211 6212 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6213 if self._match(TokenType.COLUMN): 6214 exists = self._parse_exists() 6215 old_column = self._parse_column() 6216 to = self._match_text_seq("TO") 6217 new_column = self._parse_column() 6218 6219 if old_column is None or to is None or new_column is None: 6220 return None 6221 6222 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6223 6224 self._match_text_seq("TO") 6225 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6226 6227 def _parse_alter_table_set(self) -> exp.AlterSet: 6228 alter_set = self.expression(exp.AlterSet) 6229 6230 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6231 "TABLE", "PROPERTIES" 6232 ): 6233 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6234 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6235 alter_set.set("expressions", [self._parse_assignment()]) 6236 elif self._match_texts(("LOGGED", "UNLOGGED")): 6237 alter_set.set("option", exp.var(self._prev.text.upper())) 6238 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6239 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6240 elif self._match_text_seq("LOCATION"): 6241 alter_set.set("location", self._parse_field()) 6242 elif self._match_text_seq("ACCESS", "METHOD"): 6243 alter_set.set("access_method", self._parse_field()) 6244 elif self._match_text_seq("TABLESPACE"): 6245 alter_set.set("tablespace", self._parse_field()) 6246 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6247 alter_set.set("file_format", [self._parse_field()]) 6248 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6249 alter_set.set("file_format", self._parse_wrapped_options()) 6250 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6251 alter_set.set("copy_options", self._parse_wrapped_options()) 6252 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6253 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6254 else: 6255 if self._match_text_seq("SERDE"): 6256 alter_set.set("serde", self._parse_field()) 6257 6258 alter_set.set("expressions", [self._parse_properties()]) 6259 6260 return alter_set 6261 6262 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6263 start = self._prev 6264 6265 if not self._match(TokenType.TABLE): 6266 return self._parse_as_command(start) 6267 6268 exists = self._parse_exists() 6269 only = self._match_text_seq("ONLY") 6270 this = self._parse_table(schema=True) 6271 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6272 6273 if self._next: 6274 self._advance() 6275 6276 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6277 if parser: 6278 actions = ensure_list(parser(self)) 6279 options = self._parse_csv(self._parse_property) 6280 6281 if not self._curr and actions: 6282 return self.expression( 6283 exp.AlterTable, 6284 this=this, 6285 exists=exists, 6286 actions=actions, 6287 only=only, 6288 options=options, 6289 cluster=cluster, 6290 ) 6291 6292 return self._parse_as_command(start) 6293 6294 def _parse_merge(self) -> exp.Merge: 6295 self._match(TokenType.INTO) 6296 target = self._parse_table() 6297 6298 if target and self._match(TokenType.ALIAS, advance=False): 6299 target.set("alias", self._parse_table_alias()) 6300 6301 self._match(TokenType.USING) 6302 using = self._parse_table() 6303 6304 self._match(TokenType.ON) 6305 on = self._parse_assignment() 6306 6307 return self.expression( 6308 exp.Merge, 6309 this=target, 6310 using=using, 6311 on=on, 6312 expressions=self._parse_when_matched(), 6313 ) 6314 6315 def _parse_when_matched(self) -> t.List[exp.When]: 6316 whens = [] 6317 6318 while self._match(TokenType.WHEN): 6319 matched = not self._match(TokenType.NOT) 6320 self._match_text_seq("MATCHED") 6321 source = ( 6322 False 6323 if self._match_text_seq("BY", "TARGET") 6324 else self._match_text_seq("BY", "SOURCE") 6325 ) 6326 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6327 6328 self._match(TokenType.THEN) 6329 6330 if self._match(TokenType.INSERT): 6331 _this = self._parse_star() 6332 if _this: 6333 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6334 else: 6335 then = self.expression( 6336 exp.Insert, 6337 this=self._parse_value(), 6338 expression=self._match_text_seq("VALUES") and self._parse_value(), 6339 ) 6340 elif self._match(TokenType.UPDATE): 6341 expressions = self._parse_star() 6342 if expressions: 6343 then = self.expression(exp.Update, expressions=expressions) 6344 else: 6345 then = self.expression( 6346 exp.Update, 6347 expressions=self._match(TokenType.SET) 6348 and self._parse_csv(self._parse_equality), 6349 ) 6350 elif self._match(TokenType.DELETE): 6351 then = self.expression(exp.Var, this=self._prev.text) 6352 else: 6353 then = None 6354 6355 whens.append( 6356 self.expression( 6357 exp.When, 6358 matched=matched, 6359 source=source, 6360 condition=condition, 6361 then=then, 6362 ) 6363 ) 6364 return whens 6365 6366 def _parse_show(self) -> t.Optional[exp.Expression]: 6367 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6368 if parser: 6369 return parser(self) 6370 return self._parse_as_command(self._prev) 6371 6372 def _parse_set_item_assignment( 6373 self, kind: t.Optional[str] = None 6374 ) -> t.Optional[exp.Expression]: 6375 index = self._index 6376 6377 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6378 return self._parse_set_transaction(global_=kind == "GLOBAL") 6379 6380 left = self._parse_primary() or self._parse_column() 6381 assignment_delimiter = self._match_texts(("=", "TO")) 6382 6383 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6384 self._retreat(index) 6385 return None 6386 6387 right = self._parse_statement() or self._parse_id_var() 6388 if isinstance(right, (exp.Column, exp.Identifier)): 6389 right = exp.var(right.name) 6390 6391 this = self.expression(exp.EQ, this=left, expression=right) 6392 return self.expression(exp.SetItem, this=this, kind=kind) 6393 6394 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6395 self._match_text_seq("TRANSACTION") 6396 characteristics = self._parse_csv( 6397 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6398 ) 6399 return self.expression( 6400 exp.SetItem, 6401 expressions=characteristics, 6402 kind="TRANSACTION", 6403 **{"global": global_}, # type: ignore 6404 ) 6405 6406 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6407 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6408 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6409 6410 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6411 index = self._index 6412 set_ = self.expression( 6413 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6414 ) 6415 6416 if self._curr: 6417 self._retreat(index) 6418 return self._parse_as_command(self._prev) 6419 6420 return set_ 6421 6422 def _parse_var_from_options( 6423 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6424 ) -> t.Optional[exp.Var]: 6425 start = self._curr 6426 if not start: 6427 return None 6428 6429 option = start.text.upper() 6430 continuations = options.get(option) 6431 6432 index = self._index 6433 self._advance() 6434 for keywords in continuations or []: 6435 if isinstance(keywords, str): 6436 keywords = (keywords,) 6437 6438 if self._match_text_seq(*keywords): 6439 option = f"{option} {' '.join(keywords)}" 6440 break 6441 else: 6442 if continuations or continuations is None: 6443 if raise_unmatched: 6444 self.raise_error(f"Unknown option {option}") 6445 6446 self._retreat(index) 6447 return None 6448 6449 return exp.var(option) 6450 6451 def _parse_as_command(self, start: Token) -> exp.Command: 6452 while self._curr: 6453 self._advance() 6454 text = self._find_sql(start, self._prev) 6455 size = len(start.text) 6456 self._warn_unsupported() 6457 return exp.Command(this=text[:size], expression=text[size:]) 6458 6459 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6460 settings = [] 6461 6462 self._match_l_paren() 6463 kind = self._parse_id_var() 6464 6465 if self._match(TokenType.L_PAREN): 6466 while True: 6467 key = self._parse_id_var() 6468 value = self._parse_primary() 6469 6470 if not key and value is None: 6471 break 6472 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6473 self._match(TokenType.R_PAREN) 6474 6475 self._match_r_paren() 6476 6477 return self.expression( 6478 exp.DictProperty, 6479 this=this, 6480 kind=kind.this if kind else None, 6481 settings=settings, 6482 ) 6483 6484 def _parse_dict_range(self, this: str) -> exp.DictRange: 6485 self._match_l_paren() 6486 has_min = self._match_text_seq("MIN") 6487 if has_min: 6488 min = self._parse_var() or self._parse_primary() 6489 self._match_text_seq("MAX") 6490 max = self._parse_var() or self._parse_primary() 6491 else: 6492 max = self._parse_var() or self._parse_primary() 6493 min = exp.Literal.number(0) 6494 self._match_r_paren() 6495 return self.expression(exp.DictRange, this=this, min=min, max=max) 6496 6497 def _parse_comprehension( 6498 self, this: t.Optional[exp.Expression] 6499 ) -> t.Optional[exp.Comprehension]: 6500 index = self._index 6501 expression = self._parse_column() 6502 if not self._match(TokenType.IN): 6503 self._retreat(index - 1) 6504 return None 6505 iterator = self._parse_column() 6506 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6507 return self.expression( 6508 exp.Comprehension, 6509 this=this, 6510 expression=expression, 6511 iterator=iterator, 6512 condition=condition, 6513 ) 6514 6515 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6516 if self._match(TokenType.HEREDOC_STRING): 6517 return self.expression(exp.Heredoc, this=self._prev.text) 6518 6519 if not self._match_text_seq("$"): 6520 return None 6521 6522 tags = ["$"] 6523 tag_text = None 6524 6525 if self._is_connected(): 6526 self._advance() 6527 tags.append(self._prev.text.upper()) 6528 else: 6529 self.raise_error("No closing $ found") 6530 6531 if tags[-1] != "$": 6532 if self._is_connected() and self._match_text_seq("$"): 6533 tag_text = tags[-1] 6534 tags.append("$") 6535 else: 6536 self.raise_error("No closing $ found") 6537 6538 heredoc_start = self._curr 6539 6540 while self._curr: 6541 if self._match_text_seq(*tags, advance=False): 6542 this = self._find_sql(heredoc_start, self._prev) 6543 self._advance(len(tags)) 6544 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6545 6546 self._advance() 6547 6548 self.raise_error(f"No closing {''.join(tags)} found") 6549 return None 6550 6551 def _find_parser( 6552 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6553 ) -> t.Optional[t.Callable]: 6554 if not self._curr: 6555 return None 6556 6557 index = self._index 6558 this = [] 6559 while True: 6560 # The current token might be multiple words 6561 curr = self._curr.text.upper() 6562 key = curr.split(" ") 6563 this.append(curr) 6564 6565 self._advance() 6566 result, trie = in_trie(trie, key) 6567 if result == TrieResult.FAILED: 6568 break 6569 6570 if result == TrieResult.EXISTS: 6571 subparser = parsers[" ".join(this)] 6572 return subparser 6573 6574 self._retreat(index) 6575 return None 6576 6577 def _match(self, token_type, advance=True, expression=None): 6578 if not self._curr: 6579 return None 6580 6581 if self._curr.token_type == token_type: 6582 if advance: 6583 self._advance() 6584 self._add_comments(expression) 6585 return True 6586 6587 return None 6588 6589 def _match_set(self, types, advance=True): 6590 if not self._curr: 6591 return None 6592 6593 if self._curr.token_type in types: 6594 if advance: 6595 self._advance() 6596 return True 6597 6598 return None 6599 6600 def _match_pair(self, token_type_a, token_type_b, advance=True): 6601 if not self._curr or not self._next: 6602 return None 6603 6604 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6605 if advance: 6606 self._advance(2) 6607 return True 6608 6609 return None 6610 6611 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6612 if not self._match(TokenType.L_PAREN, expression=expression): 6613 self.raise_error("Expecting (") 6614 6615 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6616 if not self._match(TokenType.R_PAREN, expression=expression): 6617 self.raise_error("Expecting )") 6618 6619 def _match_texts(self, texts, advance=True): 6620 if self._curr and self._curr.text.upper() in texts: 6621 if advance: 6622 self._advance() 6623 return True 6624 return None 6625 6626 def _match_text_seq(self, *texts, advance=True): 6627 index = self._index 6628 for text in texts: 6629 if self._curr and self._curr.text.upper() == text: 6630 self._advance() 6631 else: 6632 self._retreat(index) 6633 return None 6634 6635 if not advance: 6636 self._retreat(index) 6637 6638 return True 6639 6640 def _replace_lambda( 6641 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6642 ) -> t.Optional[exp.Expression]: 6643 if not node: 6644 return node 6645 6646 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6647 6648 for column in node.find_all(exp.Column): 6649 typ = lambda_types.get(column.parts[0].name) 6650 if typ is not None: 6651 dot_or_id = column.to_dot() if column.table else column.this 6652 6653 if typ: 6654 dot_or_id = self.expression( 6655 exp.Cast, 6656 this=dot_or_id, 6657 to=typ, 6658 ) 6659 6660 parent = column.parent 6661 6662 while isinstance(parent, exp.Dot): 6663 if not isinstance(parent.parent, exp.Dot): 6664 parent.replace(dot_or_id) 6665 break 6666 parent = parent.parent 6667 else: 6668 if column is node: 6669 node = dot_or_id 6670 else: 6671 column.replace(dot_or_id) 6672 return node 6673 6674 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6675 start = self._prev 6676 6677 # Not to be confused with TRUNCATE(number, decimals) function call 6678 if self._match(TokenType.L_PAREN): 6679 self._retreat(self._index - 2) 6680 return self._parse_function() 6681 6682 # Clickhouse supports TRUNCATE DATABASE as well 6683 is_database = self._match(TokenType.DATABASE) 6684 6685 self._match(TokenType.TABLE) 6686 6687 exists = self._parse_exists(not_=False) 6688 6689 expressions = self._parse_csv( 6690 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6691 ) 6692 6693 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6694 6695 if self._match_text_seq("RESTART", "IDENTITY"): 6696 identity = "RESTART" 6697 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6698 identity = "CONTINUE" 6699 else: 6700 identity = None 6701 6702 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6703 option = self._prev.text 6704 else: 6705 option = None 6706 6707 partition = self._parse_partition() 6708 6709 # Fallback case 6710 if self._curr: 6711 return self._parse_as_command(start) 6712 6713 return self.expression( 6714 exp.TruncateTable, 6715 expressions=expressions, 6716 is_database=is_database, 6717 exists=exists, 6718 cluster=cluster, 6719 identity=identity, 6720 option=option, 6721 partition=partition, 6722 ) 6723 6724 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6725 this = self._parse_ordered(self._parse_opclass) 6726 6727 if not self._match(TokenType.WITH): 6728 return this 6729 6730 op = self._parse_var(any_token=True) 6731 6732 return self.expression(exp.WithOperator, this=this, op=op) 6733 6734 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6735 self._match(TokenType.EQ) 6736 self._match(TokenType.L_PAREN) 6737 6738 opts: t.List[t.Optional[exp.Expression]] = [] 6739 while self._curr and not self._match(TokenType.R_PAREN): 6740 if self._match_text_seq("FORMAT_NAME", "="): 6741 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6742 # so we parse it separately to use _parse_field() 6743 prop = self.expression( 6744 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6745 ) 6746 opts.append(prop) 6747 else: 6748 opts.append(self._parse_property()) 6749 6750 self._match(TokenType.COMMA) 6751 6752 return opts 6753 6754 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6755 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6756 6757 options = [] 6758 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6759 option = self._parse_var(any_token=True) 6760 prev = self._prev.text.upper() 6761 6762 # Different dialects might separate options and values by white space, "=" and "AS" 6763 self._match(TokenType.EQ) 6764 self._match(TokenType.ALIAS) 6765 6766 param = self.expression(exp.CopyParameter, this=option) 6767 6768 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6769 TokenType.L_PAREN, advance=False 6770 ): 6771 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6772 param.set("expressions", self._parse_wrapped_options()) 6773 elif prev == "FILE_FORMAT": 6774 # T-SQL's external file format case 6775 param.set("expression", self._parse_field()) 6776 else: 6777 param.set("expression", self._parse_unquoted_field()) 6778 6779 options.append(param) 6780 self._match(sep) 6781 6782 return options 6783 6784 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6785 expr = self.expression(exp.Credentials) 6786 6787 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6788 expr.set("storage", self._parse_field()) 6789 if self._match_text_seq("CREDENTIALS"): 6790 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6791 creds = ( 6792 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6793 ) 6794 expr.set("credentials", creds) 6795 if self._match_text_seq("ENCRYPTION"): 6796 expr.set("encryption", self._parse_wrapped_options()) 6797 if self._match_text_seq("IAM_ROLE"): 6798 expr.set("iam_role", self._parse_field()) 6799 if self._match_text_seq("REGION"): 6800 expr.set("region", self._parse_field()) 6801 6802 return expr 6803 6804 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6805 return self._parse_field() 6806 6807 def _parse_copy(self) -> exp.Copy | exp.Command: 6808 start = self._prev 6809 6810 self._match(TokenType.INTO) 6811 6812 this = ( 6813 self._parse_select(nested=True, parse_subquery_alias=False) 6814 if self._match(TokenType.L_PAREN, advance=False) 6815 else self._parse_table(schema=True) 6816 ) 6817 6818 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6819 6820 files = self._parse_csv(self._parse_file_location) 6821 credentials = self._parse_credentials() 6822 6823 self._match_text_seq("WITH") 6824 6825 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6826 6827 # Fallback case 6828 if self._curr: 6829 return self._parse_as_command(start) 6830 6831 return self.expression( 6832 exp.Copy, 6833 this=this, 6834 kind=kind, 6835 credentials=credentials, 6836 files=files, 6837 params=params, 6838 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1232 def __init__( 1233 self, 1234 error_level: t.Optional[ErrorLevel] = None, 1235 error_message_context: int = 100, 1236 max_errors: int = 3, 1237 dialect: DialectType = None, 1238 ): 1239 from sqlglot.dialects import Dialect 1240 1241 self.error_level = error_level or ErrorLevel.IMMEDIATE 1242 self.error_message_context = error_message_context 1243 self.max_errors = max_errors 1244 self.dialect = Dialect.get_or_raise(dialect) 1245 self.reset()
1257 def parse( 1258 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1259 ) -> t.List[t.Optional[exp.Expression]]: 1260 """ 1261 Parses a list of tokens and returns a list of syntax trees, one tree 1262 per parsed SQL statement. 1263 1264 Args: 1265 raw_tokens: The list of tokens. 1266 sql: The original SQL string, used to produce helpful debug messages. 1267 1268 Returns: 1269 The list of the produced syntax trees. 1270 """ 1271 return self._parse( 1272 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1273 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1275 def parse_into( 1276 self, 1277 expression_types: exp.IntoType, 1278 raw_tokens: t.List[Token], 1279 sql: t.Optional[str] = None, 1280 ) -> t.List[t.Optional[exp.Expression]]: 1281 """ 1282 Parses a list of tokens into a given Expression type. If a collection of Expression 1283 types is given instead, this method will try to parse the token list into each one 1284 of them, stopping at the first for which the parsing succeeds. 1285 1286 Args: 1287 expression_types: The expression type(s) to try and parse the token list into. 1288 raw_tokens: The list of tokens. 1289 sql: The original SQL string, used to produce helpful debug messages. 1290 1291 Returns: 1292 The target Expression. 1293 """ 1294 errors = [] 1295 for expression_type in ensure_list(expression_types): 1296 parser = self.EXPRESSION_PARSERS.get(expression_type) 1297 if not parser: 1298 raise TypeError(f"No parser registered for {expression_type}") 1299 1300 try: 1301 return self._parse(parser, raw_tokens, sql) 1302 except ParseError as e: 1303 e.errors[0]["into_expression"] = expression_type 1304 errors.append(e) 1305 1306 raise ParseError( 1307 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1308 errors=merge_errors(errors), 1309 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1349 def check_errors(self) -> None: 1350 """Logs or raises any found errors, depending on the chosen error level setting.""" 1351 if self.error_level == ErrorLevel.WARN: 1352 for error in self.errors: 1353 logger.error(str(error)) 1354 elif self.error_level == ErrorLevel.RAISE and self.errors: 1355 raise ParseError( 1356 concat_messages(self.errors, self.max_errors), 1357 errors=merge_errors(self.errors), 1358 )
Logs or raises any found errors, depending on the chosen error level setting.
1360 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1361 """ 1362 Appends an error in the list of recorded errors or raises it, depending on the chosen 1363 error level setting. 1364 """ 1365 token = token or self._curr or self._prev or Token.string("") 1366 start = token.start 1367 end = token.end + 1 1368 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1369 highlight = self.sql[start:end] 1370 end_context = self.sql[end : end + self.error_message_context] 1371 1372 error = ParseError.new( 1373 f"{message}. Line {token.line}, Col: {token.col}.\n" 1374 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1375 description=message, 1376 line=token.line, 1377 col=token.col, 1378 start_context=start_context, 1379 highlight=highlight, 1380 end_context=end_context, 1381 ) 1382 1383 if self.error_level == ErrorLevel.IMMEDIATE: 1384 raise error 1385 1386 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1388 def expression( 1389 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1390 ) -> E: 1391 """ 1392 Creates a new, validated Expression. 1393 1394 Args: 1395 exp_class: The expression class to instantiate. 1396 comments: An optional list of comments to attach to the expression. 1397 kwargs: The arguments to set for the expression along with their respective values. 1398 1399 Returns: 1400 The target expression. 1401 """ 1402 instance = exp_class(**kwargs) 1403 instance.add_comments(comments) if comments else self._add_comments(instance) 1404 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1411 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1412 """ 1413 Validates an Expression, making sure that all its mandatory arguments are set. 1414 1415 Args: 1416 expression: The expression to validate. 1417 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1418 1419 Returns: 1420 The validated expression. 1421 """ 1422 if self.error_level != ErrorLevel.IGNORE: 1423 for error_message in expression.error_messages(args): 1424 self.raise_error(error_message) 1425 1426 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.