sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24) 25from sqlglot.helper import flatten, is_float, is_int, seq_get 26from sqlglot.tokens import TokenType 27 28if t.TYPE_CHECKING: 29 from sqlglot._typing import E 30 31 32# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 33def _build_datetime( 34 name: str, kind: exp.DataType.Type, safe: bool = False 35) -> t.Callable[[t.List], exp.Func]: 36 def _builder(args: t.List) -> exp.Func: 37 value = seq_get(args, 0) 38 int_value = value is not None and is_int(value.name) 39 40 if isinstance(value, exp.Literal): 41 # Converts calls like `TO_TIME('01:02:03')` into casts 42 if len(args) == 1 and value.is_string and not int_value: 43 return exp.cast(value, kind) 44 45 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 46 # cases so we can transpile them, since they're relatively common 47 if kind == exp.DataType.Type.TIMESTAMP: 48 if int_value: 49 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 50 if not is_float(value.this): 51 return build_formatted_time(exp.StrToTime, "snowflake")(args) 52 53 if kind == exp.DataType.Type.DATE and not int_value: 54 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 55 formatted_exp.set("safe", safe) 56 return formatted_exp 57 58 return exp.Anonymous(this=name, expressions=args) 59 60 return _builder 61 62 63def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 64 expression = parser.build_var_map(args) 65 66 if isinstance(expression, exp.StarMap): 67 return expression 68 69 return exp.Struct( 70 expressions=[ 71 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 72 ] 73 ) 74 75 76def _build_datediff(args: t.List) -> exp.DateDiff: 77 return exp.DateDiff( 78 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 79 ) 80 81 82def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 83 def _builder(args: t.List) -> E: 84 return expr_type( 85 this=seq_get(args, 2), 86 expression=seq_get(args, 1), 87 unit=_map_date_part(seq_get(args, 0)), 88 ) 89 90 return _builder 91 92 93# https://docs.snowflake.com/en/sql-reference/functions/div0 94def _build_if_from_div0(args: t.List) -> exp.If: 95 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 96 true = exp.Literal.number(0) 97 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 98 return exp.If(this=cond, true=true, false=false) 99 100 101# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 102def _build_if_from_zeroifnull(args: t.List) -> exp.If: 103 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 104 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 105 106 107# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 108def _build_if_from_nullifzero(args: t.List) -> exp.If: 109 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 110 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 111 112 113def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 114 flag = expression.text("flag") 115 116 if "i" not in flag: 117 flag += "i" 118 119 return self.func( 120 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 121 ) 122 123 124def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 125 if len(args) == 3: 126 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 127 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 128 129 130def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 131 regexp_replace = exp.RegexpReplace.from_arg_list(args) 132 133 if not regexp_replace.args.get("replacement"): 134 regexp_replace.set("replacement", exp.Literal.string("")) 135 136 return regexp_replace 137 138 139def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 140 def _parse(self: Snowflake.Parser) -> exp.Show: 141 return self._parse_show_snowflake(*args, **kwargs) 142 143 return _parse 144 145 146DATE_PART_MAPPING = { 147 "Y": "YEAR", 148 "YY": "YEAR", 149 "YYY": "YEAR", 150 "YYYY": "YEAR", 151 "YR": "YEAR", 152 "YEARS": "YEAR", 153 "YRS": "YEAR", 154 "MM": "MONTH", 155 "MON": "MONTH", 156 "MONS": "MONTH", 157 "MONTHS": "MONTH", 158 "D": "DAY", 159 "DD": "DAY", 160 "DAYS": "DAY", 161 "DAYOFMONTH": "DAY", 162 "WEEKDAY": "DAYOFWEEK", 163 "DOW": "DAYOFWEEK", 164 "DW": "DAYOFWEEK", 165 "WEEKDAY_ISO": "DAYOFWEEKISO", 166 "DOW_ISO": "DAYOFWEEKISO", 167 "DW_ISO": "DAYOFWEEKISO", 168 "YEARDAY": "DAYOFYEAR", 169 "DOY": "DAYOFYEAR", 170 "DY": "DAYOFYEAR", 171 "W": "WEEK", 172 "WK": "WEEK", 173 "WEEKOFYEAR": "WEEK", 174 "WOY": "WEEK", 175 "WY": "WEEK", 176 "WEEK_ISO": "WEEKISO", 177 "WEEKOFYEARISO": "WEEKISO", 178 "WEEKOFYEAR_ISO": "WEEKISO", 179 "Q": "QUARTER", 180 "QTR": "QUARTER", 181 "QTRS": "QUARTER", 182 "QUARTERS": "QUARTER", 183 "H": "HOUR", 184 "HH": "HOUR", 185 "HR": "HOUR", 186 "HOURS": "HOUR", 187 "HRS": "HOUR", 188 "M": "MINUTE", 189 "MI": "MINUTE", 190 "MIN": "MINUTE", 191 "MINUTES": "MINUTE", 192 "MINS": "MINUTE", 193 "S": "SECOND", 194 "SEC": "SECOND", 195 "SECONDS": "SECOND", 196 "SECS": "SECOND", 197 "MS": "MILLISECOND", 198 "MSEC": "MILLISECOND", 199 "MILLISECONDS": "MILLISECOND", 200 "US": "MICROSECOND", 201 "USEC": "MICROSECOND", 202 "MICROSECONDS": "MICROSECOND", 203 "NS": "NANOSECOND", 204 "NSEC": "NANOSECOND", 205 "NANOSEC": "NANOSECOND", 206 "NSECOND": "NANOSECOND", 207 "NSECONDS": "NANOSECOND", 208 "NANOSECS": "NANOSECOND", 209 "EPOCH": "EPOCH_SECOND", 210 "EPOCH_SECONDS": "EPOCH_SECOND", 211 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 212 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 213 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 214 "TZH": "TIMEZONE_HOUR", 215 "TZM": "TIMEZONE_MINUTE", 216} 217 218 219@t.overload 220def _map_date_part(part: exp.Expression) -> exp.Var: 221 pass 222 223 224@t.overload 225def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 226 pass 227 228 229def _map_date_part(part): 230 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 231 return exp.var(mapped) if mapped else part 232 233 234def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 235 trunc = date_trunc_to_time(args) 236 trunc.set("unit", _map_date_part(trunc.args["unit"])) 237 return trunc 238 239 240def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 241 """ 242 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 243 so we need to unqualify them. 244 245 Example: 246 >>> from sqlglot import parse_one 247 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 248 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 249 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 250 """ 251 if isinstance(expression, exp.Pivot) and expression.unpivot: 252 expression = transforms.unqualify_columns(expression) 253 254 return expression 255 256 257def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 258 assert isinstance(expression, exp.Create) 259 260 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 261 if expression.this in exp.DataType.NESTED_TYPES: 262 expression.set("expressions", None) 263 return expression 264 265 props = expression.args.get("properties") 266 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 267 for schema_expression in expression.this.expressions: 268 if isinstance(schema_expression, exp.ColumnDef): 269 column_type = schema_expression.kind 270 if isinstance(column_type, exp.DataType): 271 column_type.transform(_flatten_structured_type, copy=False) 272 273 return expression 274 275 276class Snowflake(Dialect): 277 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 278 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 279 NULL_ORDERING = "nulls_are_large" 280 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 281 SUPPORTS_USER_DEFINED_TYPES = False 282 SUPPORTS_SEMI_ANTI_JOIN = False 283 PREFER_CTE_ALIAS_COLUMN = True 284 TABLESAMPLE_SIZE_IS_PERCENT = True 285 COPY_PARAMS_ARE_CSV = False 286 287 TIME_MAPPING = { 288 "YYYY": "%Y", 289 "yyyy": "%Y", 290 "YY": "%y", 291 "yy": "%y", 292 "MMMM": "%B", 293 "mmmm": "%B", 294 "MON": "%b", 295 "mon": "%b", 296 "MM": "%m", 297 "mm": "%m", 298 "DD": "%d", 299 "dd": "%-d", 300 "DY": "%a", 301 "dy": "%w", 302 "HH24": "%H", 303 "hh24": "%H", 304 "HH12": "%I", 305 "hh12": "%I", 306 "MI": "%M", 307 "mi": "%M", 308 "SS": "%S", 309 "ss": "%S", 310 "FF": "%f", 311 "ff": "%f", 312 "FF6": "%f", 313 "ff6": "%f", 314 } 315 316 def quote_identifier(self, expression: E, identify: bool = True) -> E: 317 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 318 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 319 if ( 320 isinstance(expression, exp.Identifier) 321 and isinstance(expression.parent, exp.Table) 322 and expression.name.lower() == "dual" 323 ): 324 return expression # type: ignore 325 326 return super().quote_identifier(expression, identify=identify) 327 328 class Parser(parser.Parser): 329 IDENTIFY_PIVOT_STRINGS = True 330 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 331 COLON_IS_JSON_EXTRACT = True 332 333 ID_VAR_TOKENS = { 334 *parser.Parser.ID_VAR_TOKENS, 335 TokenType.MATCH_CONDITION, 336 } 337 338 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 339 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 340 341 FUNCTIONS = { 342 **parser.Parser.FUNCTIONS, 343 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 344 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 345 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 346 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 347 this=seq_get(args, 1), expression=seq_get(args, 0) 348 ), 349 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 350 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 351 start=seq_get(args, 0), 352 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 353 step=seq_get(args, 2), 354 ), 355 "BITXOR": binary_from_function(exp.BitwiseXor), 356 "BIT_XOR": binary_from_function(exp.BitwiseXor), 357 "BOOLXOR": binary_from_function(exp.Xor), 358 "CONVERT_TIMEZONE": _build_convert_timezone, 359 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 360 "DATE_TRUNC": _date_trunc_to_time, 361 "DATEADD": _build_date_time_add(exp.DateAdd), 362 "DATEDIFF": _build_datediff, 363 "DIV0": _build_if_from_div0, 364 "FLATTEN": exp.Explode.from_arg_list, 365 "GET_PATH": lambda args, dialect: exp.JSONExtract( 366 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 367 ), 368 "IFF": exp.If.from_arg_list, 369 "LAST_DAY": lambda args: exp.LastDay( 370 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 371 ), 372 "LISTAGG": exp.GroupConcat.from_arg_list, 373 "MEDIAN": lambda args: exp.PercentileCont( 374 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 375 ), 376 "NULLIFZERO": _build_if_from_nullifzero, 377 "OBJECT_CONSTRUCT": _build_object_construct, 378 "REGEXP_REPLACE": _build_regexp_replace, 379 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 380 "RLIKE": exp.RegexpLike.from_arg_list, 381 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 382 "TIMEADD": _build_date_time_add(exp.TimeAdd), 383 "TIMEDIFF": _build_datediff, 384 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 385 "TIMESTAMPDIFF": _build_datediff, 386 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 387 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 388 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 389 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 390 "TO_NUMBER": lambda args: exp.ToNumber( 391 this=seq_get(args, 0), 392 format=seq_get(args, 1), 393 precision=seq_get(args, 2), 394 scale=seq_get(args, 3), 395 ), 396 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 397 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 398 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 399 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 400 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 401 "TO_VARCHAR": exp.ToChar.from_arg_list, 402 "ZEROIFNULL": _build_if_from_zeroifnull, 403 } 404 405 FUNCTION_PARSERS = { 406 **parser.Parser.FUNCTION_PARSERS, 407 "DATE_PART": lambda self: self._parse_date_part(), 408 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 409 } 410 FUNCTION_PARSERS.pop("TRIM") 411 412 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 413 414 RANGE_PARSERS = { 415 **parser.Parser.RANGE_PARSERS, 416 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 417 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 418 } 419 420 ALTER_PARSERS = { 421 **parser.Parser.ALTER_PARSERS, 422 "UNSET": lambda self: self.expression( 423 exp.Set, 424 tag=self._match_text_seq("TAG"), 425 expressions=self._parse_csv(self._parse_id_var), 426 unset=True, 427 ), 428 "SWAP": lambda self: self._parse_alter_table_swap(), 429 } 430 431 STATEMENT_PARSERS = { 432 **parser.Parser.STATEMENT_PARSERS, 433 TokenType.SHOW: lambda self: self._parse_show(), 434 } 435 436 PROPERTY_PARSERS = { 437 **parser.Parser.PROPERTY_PARSERS, 438 "LOCATION": lambda self: self._parse_location_property(), 439 } 440 441 TYPE_CONVERTERS = { 442 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 443 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 444 } 445 446 SHOW_PARSERS = { 447 "SCHEMAS": _show_parser("SCHEMAS"), 448 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 449 "OBJECTS": _show_parser("OBJECTS"), 450 "TERSE OBJECTS": _show_parser("OBJECTS"), 451 "TABLES": _show_parser("TABLES"), 452 "TERSE TABLES": _show_parser("TABLES"), 453 "VIEWS": _show_parser("VIEWS"), 454 "TERSE VIEWS": _show_parser("VIEWS"), 455 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 456 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 457 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 458 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 459 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 460 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 461 "SEQUENCES": _show_parser("SEQUENCES"), 462 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 463 "COLUMNS": _show_parser("COLUMNS"), 464 "USERS": _show_parser("USERS"), 465 "TERSE USERS": _show_parser("USERS"), 466 } 467 468 CONSTRAINT_PARSERS = { 469 **parser.Parser.CONSTRAINT_PARSERS, 470 "WITH": lambda self: self._parse_with_constraint(), 471 "MASKING": lambda self: self._parse_with_constraint(), 472 "PROJECTION": lambda self: self._parse_with_constraint(), 473 "TAG": lambda self: self._parse_with_constraint(), 474 } 475 476 STAGED_FILE_SINGLE_TOKENS = { 477 TokenType.DOT, 478 TokenType.MOD, 479 TokenType.SLASH, 480 } 481 482 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 483 484 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 485 486 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 487 488 LAMBDAS = { 489 **parser.Parser.LAMBDAS, 490 TokenType.ARROW: lambda self, expressions: self.expression( 491 exp.Lambda, 492 this=self._replace_lambda( 493 self._parse_assignment(), 494 expressions, 495 ), 496 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 497 ), 498 } 499 500 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 501 if self._prev.token_type != TokenType.WITH: 502 self._retreat(self._index - 1) 503 504 if self._match_text_seq("MASKING", "POLICY"): 505 policy = self._parse_column() 506 return self.expression( 507 exp.MaskingPolicyColumnConstraint, 508 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 509 expressions=self._match(TokenType.USING) 510 and self._parse_wrapped_csv(self._parse_id_var), 511 ) 512 if self._match_text_seq("PROJECTION", "POLICY"): 513 policy = self._parse_column() 514 return self.expression( 515 exp.ProjectionPolicyColumnConstraint, 516 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 517 ) 518 if self._match(TokenType.TAG): 519 return self.expression( 520 exp.TagColumnConstraint, 521 expressions=self._parse_wrapped_csv(self._parse_property), 522 ) 523 524 return None 525 526 def _parse_create(self) -> exp.Create | exp.Command: 527 expression = super()._parse_create() 528 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 529 # Replace the Table node with the enclosed Identifier 530 expression.this.replace(expression.this.this) 531 532 return expression 533 534 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 535 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 536 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 537 this = self._parse_var() or self._parse_type() 538 539 if not this: 540 return None 541 542 self._match(TokenType.COMMA) 543 expression = self._parse_bitwise() 544 this = _map_date_part(this) 545 name = this.name.upper() 546 547 if name.startswith("EPOCH"): 548 if name == "EPOCH_MILLISECOND": 549 scale = 10**3 550 elif name == "EPOCH_MICROSECOND": 551 scale = 10**6 552 elif name == "EPOCH_NANOSECOND": 553 scale = 10**9 554 else: 555 scale = None 556 557 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 558 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 559 560 if scale: 561 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 562 563 return to_unix 564 565 return self.expression(exp.Extract, this=this, expression=expression) 566 567 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 568 if is_map: 569 # Keys are strings in Snowflake's objects, see also: 570 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 571 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 572 return self._parse_slice(self._parse_string()) 573 574 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 575 576 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 577 lateral = super()._parse_lateral() 578 if not lateral: 579 return lateral 580 581 if isinstance(lateral.this, exp.Explode): 582 table_alias = lateral.args.get("alias") 583 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 584 if table_alias and not table_alias.args.get("columns"): 585 table_alias.set("columns", columns) 586 elif not table_alias: 587 exp.alias_(lateral, "_flattened", table=columns, copy=False) 588 589 return lateral 590 591 def _parse_at_before(self, table: exp.Table) -> exp.Table: 592 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 593 index = self._index 594 if self._match_texts(("AT", "BEFORE")): 595 this = self._prev.text.upper() 596 kind = ( 597 self._match(TokenType.L_PAREN) 598 and self._match_texts(self.HISTORICAL_DATA_KIND) 599 and self._prev.text.upper() 600 ) 601 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 602 603 if expression: 604 self._match_r_paren() 605 when = self.expression( 606 exp.HistoricalData, this=this, kind=kind, expression=expression 607 ) 608 table.set("when", when) 609 else: 610 self._retreat(index) 611 612 return table 613 614 def _parse_table_parts( 615 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 616 ) -> exp.Table: 617 # https://docs.snowflake.com/en/user-guide/querying-stage 618 if self._match(TokenType.STRING, advance=False): 619 table = self._parse_string() 620 elif self._match_text_seq("@", advance=False): 621 table = self._parse_location_path() 622 else: 623 table = None 624 625 if table: 626 file_format = None 627 pattern = None 628 629 wrapped = self._match(TokenType.L_PAREN) 630 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 631 if self._match_text_seq("FILE_FORMAT", "=>"): 632 file_format = self._parse_string() or super()._parse_table_parts( 633 is_db_reference=is_db_reference 634 ) 635 elif self._match_text_seq("PATTERN", "=>"): 636 pattern = self._parse_string() 637 else: 638 break 639 640 self._match(TokenType.COMMA) 641 642 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 643 else: 644 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 645 646 return self._parse_at_before(table) 647 648 def _parse_id_var( 649 self, 650 any_token: bool = True, 651 tokens: t.Optional[t.Collection[TokenType]] = None, 652 ) -> t.Optional[exp.Expression]: 653 if self._match_text_seq("IDENTIFIER", "("): 654 identifier = ( 655 super()._parse_id_var(any_token=any_token, tokens=tokens) 656 or self._parse_string() 657 ) 658 self._match_r_paren() 659 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 660 661 return super()._parse_id_var(any_token=any_token, tokens=tokens) 662 663 def _parse_show_snowflake(self, this: str) -> exp.Show: 664 scope = None 665 scope_kind = None 666 667 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 668 # which is syntactically valid but has no effect on the output 669 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 670 671 history = self._match_text_seq("HISTORY") 672 673 like = self._parse_string() if self._match(TokenType.LIKE) else None 674 675 if self._match(TokenType.IN): 676 if self._match_text_seq("ACCOUNT"): 677 scope_kind = "ACCOUNT" 678 elif self._match_set(self.DB_CREATABLES): 679 scope_kind = self._prev.text.upper() 680 if self._curr: 681 scope = self._parse_table_parts() 682 elif self._curr: 683 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 684 scope = self._parse_table_parts() 685 686 return self.expression( 687 exp.Show, 688 **{ 689 "terse": terse, 690 "this": this, 691 "history": history, 692 "like": like, 693 "scope": scope, 694 "scope_kind": scope_kind, 695 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 696 "limit": self._parse_limit(), 697 "from": self._parse_string() if self._match(TokenType.FROM) else None, 698 }, 699 ) 700 701 def _parse_alter_table_swap(self) -> exp.SwapTable: 702 self._match_text_seq("WITH") 703 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 704 705 def _parse_location_property(self) -> exp.LocationProperty: 706 self._match(TokenType.EQ) 707 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 708 709 def _parse_file_location(self) -> t.Optional[exp.Expression]: 710 # Parse either a subquery or a staged file 711 return ( 712 self._parse_select(table=True, parse_subquery_alias=False) 713 if self._match(TokenType.L_PAREN, advance=False) 714 else self._parse_table_parts() 715 ) 716 717 def _parse_location_path(self) -> exp.Var: 718 parts = [self._advance_any(ignore_reserved=True)] 719 720 # We avoid consuming a comma token because external tables like @foo and @bar 721 # can be joined in a query with a comma separator, as well as closing paren 722 # in case of subqueries 723 while self._is_connected() and not self._match_set( 724 (TokenType.COMMA, TokenType.R_PAREN), advance=False 725 ): 726 parts.append(self._advance_any(ignore_reserved=True)) 727 728 return exp.var("".join(part.text for part in parts if part)) 729 730 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 731 this = super()._parse_lambda_arg() 732 733 if not this: 734 return this 735 736 typ = self._parse_types() 737 738 if typ: 739 return self.expression(exp.Cast, this=this, to=typ) 740 741 return this 742 743 class Tokenizer(tokens.Tokenizer): 744 STRING_ESCAPES = ["\\", "'"] 745 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 746 RAW_STRINGS = ["$$"] 747 COMMENTS = ["--", "//", ("/*", "*/")] 748 749 KEYWORDS = { 750 **tokens.Tokenizer.KEYWORDS, 751 "BYTEINT": TokenType.INT, 752 "CHAR VARYING": TokenType.VARCHAR, 753 "CHARACTER VARYING": TokenType.VARCHAR, 754 "EXCLUDE": TokenType.EXCEPT, 755 "ILIKE ANY": TokenType.ILIKE_ANY, 756 "LIKE ANY": TokenType.LIKE_ANY, 757 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 758 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 759 "MINUS": TokenType.EXCEPT, 760 "NCHAR VARYING": TokenType.VARCHAR, 761 "PUT": TokenType.COMMAND, 762 "REMOVE": TokenType.COMMAND, 763 "RM": TokenType.COMMAND, 764 "SAMPLE": TokenType.TABLE_SAMPLE, 765 "SQL_DOUBLE": TokenType.DOUBLE, 766 "SQL_VARCHAR": TokenType.VARCHAR, 767 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 768 "TAG": TokenType.TAG, 769 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 770 "TOP": TokenType.TOP, 771 "WAREHOUSE": TokenType.WAREHOUSE, 772 "STREAMLIT": TokenType.STREAMLIT, 773 } 774 775 SINGLE_TOKENS = { 776 **tokens.Tokenizer.SINGLE_TOKENS, 777 "$": TokenType.PARAMETER, 778 } 779 780 VAR_SINGLE_TOKENS = {"$"} 781 782 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 783 784 class Generator(generator.Generator): 785 PARAMETER_TOKEN = "$" 786 MATCHED_BY_SOURCE = False 787 SINGLE_STRING_INTERVAL = True 788 JOIN_HINTS = False 789 TABLE_HINTS = False 790 QUERY_HINTS = False 791 AGGREGATE_FILTER_SUPPORTED = False 792 SUPPORTS_TABLE_COPY = False 793 COLLATE_IS_FUNC = True 794 LIMIT_ONLY_LITERALS = True 795 JSON_KEY_VALUE_PAIR_SEP = "," 796 INSERT_OVERWRITE = " OVERWRITE INTO" 797 STRUCT_DELIMITER = ("(", ")") 798 COPY_PARAMS_ARE_WRAPPED = False 799 COPY_PARAMS_EQ_REQUIRED = True 800 STAR_EXCEPT = "EXCLUDE" 801 802 TRANSFORMS = { 803 **generator.Generator.TRANSFORMS, 804 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 805 exp.ArgMax: rename_func("MAX_BY"), 806 exp.ArgMin: rename_func("MIN_BY"), 807 exp.Array: inline_array_sql, 808 exp.ArrayConcat: rename_func("ARRAY_CAT"), 809 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 810 exp.AtTimeZone: lambda self, e: self.func( 811 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 812 ), 813 exp.BitwiseXor: rename_func("BITXOR"), 814 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 815 exp.DateAdd: date_delta_sql("DATEADD"), 816 exp.DateDiff: date_delta_sql("DATEDIFF"), 817 exp.DateStrToDate: datestrtodate_sql, 818 exp.DayOfMonth: rename_func("DAYOFMONTH"), 819 exp.DayOfWeek: rename_func("DAYOFWEEK"), 820 exp.DayOfYear: rename_func("DAYOFYEAR"), 821 exp.Explode: rename_func("FLATTEN"), 822 exp.Extract: rename_func("DATE_PART"), 823 exp.FromTimeZone: lambda self, e: self.func( 824 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 825 ), 826 exp.GenerateSeries: lambda self, e: self.func( 827 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 828 ), 829 exp.GroupConcat: rename_func("LISTAGG"), 830 exp.If: if_sql(name="IFF", false_value="NULL"), 831 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 832 exp.JSONExtractScalar: lambda self, e: self.func( 833 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 834 ), 835 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 836 exp.JSONPathRoot: lambda *_: "", 837 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 838 exp.LogicalOr: rename_func("BOOLOR_AGG"), 839 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 840 exp.Max: max_or_greatest, 841 exp.Min: min_or_least, 842 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 843 exp.PercentileCont: transforms.preprocess( 844 [transforms.add_within_group_for_percentiles] 845 ), 846 exp.PercentileDisc: transforms.preprocess( 847 [transforms.add_within_group_for_percentiles] 848 ), 849 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 850 exp.RegexpILike: _regexpilike_sql, 851 exp.Rand: rename_func("RANDOM"), 852 exp.Select: transforms.preprocess( 853 [ 854 transforms.eliminate_distinct_on, 855 transforms.explode_to_unnest(), 856 transforms.eliminate_semi_and_anti_joins, 857 ] 858 ), 859 exp.SHA: rename_func("SHA1"), 860 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 861 exp.StartsWith: rename_func("STARTSWITH"), 862 exp.StrPosition: lambda self, e: self.func( 863 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 864 ), 865 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 866 exp.Stuff: rename_func("INSERT"), 867 exp.TimeAdd: date_delta_sql("TIMEADD"), 868 exp.TimestampDiff: lambda self, e: self.func( 869 "TIMESTAMPDIFF", e.unit, e.expression, e.this 870 ), 871 exp.TimestampTrunc: timestamptrunc_sql(), 872 exp.TimeStrToTime: timestrtotime_sql, 873 exp.TimeToStr: lambda self, e: self.func( 874 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 875 ), 876 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 877 exp.ToArray: rename_func("TO_ARRAY"), 878 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 879 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 880 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 881 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 882 exp.TsOrDsToDate: lambda self, e: self.func( 883 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 884 ), 885 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 886 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 887 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 888 exp.Xor: rename_func("BOOLXOR"), 889 } 890 891 SUPPORTED_JSON_PATH_PARTS = { 892 exp.JSONPathKey, 893 exp.JSONPathRoot, 894 exp.JSONPathSubscript, 895 } 896 897 TYPE_MAPPING = { 898 **generator.Generator.TYPE_MAPPING, 899 exp.DataType.Type.NESTED: "OBJECT", 900 exp.DataType.Type.STRUCT: "OBJECT", 901 } 902 903 PROPERTIES_LOCATION = { 904 **generator.Generator.PROPERTIES_LOCATION, 905 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 906 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 907 } 908 909 UNSUPPORTED_VALUES_EXPRESSIONS = { 910 exp.Map, 911 exp.StarMap, 912 exp.Struct, 913 exp.VarMap, 914 } 915 916 def with_properties(self, properties: exp.Properties) -> str: 917 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 918 919 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 920 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 921 values_as_table = False 922 923 return super().values_sql(expression, values_as_table=values_as_table) 924 925 def datatype_sql(self, expression: exp.DataType) -> str: 926 expressions = expression.expressions 927 if ( 928 expressions 929 and expression.is_type(*exp.DataType.STRUCT_TYPES) 930 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 931 ): 932 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 933 return "OBJECT" 934 935 return super().datatype_sql(expression) 936 937 def tonumber_sql(self, expression: exp.ToNumber) -> str: 938 return self.func( 939 "TO_NUMBER", 940 expression.this, 941 expression.args.get("format"), 942 expression.args.get("precision"), 943 expression.args.get("scale"), 944 ) 945 946 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 947 milli = expression.args.get("milli") 948 if milli is not None: 949 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 950 expression.set("nano", milli_to_nano) 951 952 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 953 954 def trycast_sql(self, expression: exp.TryCast) -> str: 955 value = expression.this 956 957 if value.type is None: 958 from sqlglot.optimizer.annotate_types import annotate_types 959 960 value = annotate_types(value) 961 962 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 963 return super().trycast_sql(expression) 964 965 # TRY_CAST only works for string values in Snowflake 966 return self.cast_sql(expression) 967 968 def log_sql(self, expression: exp.Log) -> str: 969 if not expression.expression: 970 return self.func("LN", expression.this) 971 972 return super().log_sql(expression) 973 974 def unnest_sql(self, expression: exp.Unnest) -> str: 975 unnest_alias = expression.args.get("alias") 976 offset = expression.args.get("offset") 977 978 columns = [ 979 exp.to_identifier("seq"), 980 exp.to_identifier("key"), 981 exp.to_identifier("path"), 982 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 983 seq_get(unnest_alias.columns if unnest_alias else [], 0) 984 or exp.to_identifier("value"), 985 exp.to_identifier("this"), 986 ] 987 988 if unnest_alias: 989 unnest_alias.set("columns", columns) 990 else: 991 unnest_alias = exp.TableAlias(this="_u", columns=columns) 992 993 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 994 alias = self.sql(unnest_alias) 995 alias = f" AS {alias}" if alias else "" 996 return f"{explode}{alias}" 997 998 def show_sql(self, expression: exp.Show) -> str: 999 terse = "TERSE " if expression.args.get("terse") else "" 1000 history = " HISTORY" if expression.args.get("history") else "" 1001 like = self.sql(expression, "like") 1002 like = f" LIKE {like}" if like else "" 1003 1004 scope = self.sql(expression, "scope") 1005 scope = f" {scope}" if scope else "" 1006 1007 scope_kind = self.sql(expression, "scope_kind") 1008 if scope_kind: 1009 scope_kind = f" IN {scope_kind}" 1010 1011 starts_with = self.sql(expression, "starts_with") 1012 if starts_with: 1013 starts_with = f" STARTS WITH {starts_with}" 1014 1015 limit = self.sql(expression, "limit") 1016 1017 from_ = self.sql(expression, "from") 1018 if from_: 1019 from_ = f" FROM {from_}" 1020 1021 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1022 1023 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1024 # Other dialects don't support all of the following parameters, so we need to 1025 # generate default values as necessary to ensure the transpilation is correct 1026 group = expression.args.get("group") 1027 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1028 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1029 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1030 1031 return self.func( 1032 "REGEXP_SUBSTR", 1033 expression.this, 1034 expression.expression, 1035 position, 1036 occurrence, 1037 parameters, 1038 group, 1039 ) 1040 1041 def except_op(self, expression: exp.Except) -> str: 1042 if not expression.args.get("distinct"): 1043 self.unsupported("EXCEPT with All is not supported in Snowflake") 1044 return super().except_op(expression) 1045 1046 def intersect_op(self, expression: exp.Intersect) -> str: 1047 if not expression.args.get("distinct"): 1048 self.unsupported("INTERSECT with All is not supported in Snowflake") 1049 return super().intersect_op(expression) 1050 1051 def describe_sql(self, expression: exp.Describe) -> str: 1052 # Default to table if kind is unknown 1053 kind_value = expression.args.get("kind") or "TABLE" 1054 kind = f" {kind_value}" if kind_value else "" 1055 this = f" {self.sql(expression, 'this')}" 1056 expressions = self.expressions(expression, flat=True) 1057 expressions = f" {expressions}" if expressions else "" 1058 return f"DESCRIBE{kind}{this}{expressions}" 1059 1060 def generatedasidentitycolumnconstraint_sql( 1061 self, expression: exp.GeneratedAsIdentityColumnConstraint 1062 ) -> str: 1063 start = expression.args.get("start") 1064 start = f" START {start}" if start else "" 1065 increment = expression.args.get("increment") 1066 increment = f" INCREMENT {increment}" if increment else "" 1067 return f"AUTOINCREMENT{start}{increment}" 1068 1069 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1070 this = self.sql(expression, "this") 1071 return f"SWAP WITH {this}" 1072 1073 def cluster_sql(self, expression: exp.Cluster) -> str: 1074 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1075 1076 def struct_sql(self, expression: exp.Struct) -> str: 1077 keys = [] 1078 values = [] 1079 1080 for i, e in enumerate(expression.expressions): 1081 if isinstance(e, exp.PropertyEQ): 1082 keys.append( 1083 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1084 ) 1085 values.append(e.expression) 1086 else: 1087 keys.append(exp.Literal.string(f"_{i}")) 1088 values.append(e) 1089 1090 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1091 1092 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1093 if expression.args.get("weight") or expression.args.get("accuracy"): 1094 self.unsupported( 1095 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1096 ) 1097 1098 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1099 1100 def alterset_sql(self, expression: exp.AlterSet) -> str: 1101 exprs = self.expressions(expression, flat=True) 1102 exprs = f" {exprs}" if exprs else "" 1103 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1104 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1105 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1106 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1107 tag = self.expressions(expression, key="tag", flat=True) 1108 tag = f" TAG {tag}" if tag else "" 1109 1110 return f"SET{exprs}{file_format}{copy_options}{tag}"
277class Snowflake(Dialect): 278 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 279 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 280 NULL_ORDERING = "nulls_are_large" 281 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 282 SUPPORTS_USER_DEFINED_TYPES = False 283 SUPPORTS_SEMI_ANTI_JOIN = False 284 PREFER_CTE_ALIAS_COLUMN = True 285 TABLESAMPLE_SIZE_IS_PERCENT = True 286 COPY_PARAMS_ARE_CSV = False 287 288 TIME_MAPPING = { 289 "YYYY": "%Y", 290 "yyyy": "%Y", 291 "YY": "%y", 292 "yy": "%y", 293 "MMMM": "%B", 294 "mmmm": "%B", 295 "MON": "%b", 296 "mon": "%b", 297 "MM": "%m", 298 "mm": "%m", 299 "DD": "%d", 300 "dd": "%-d", 301 "DY": "%a", 302 "dy": "%w", 303 "HH24": "%H", 304 "hh24": "%H", 305 "HH12": "%I", 306 "hh12": "%I", 307 "MI": "%M", 308 "mi": "%M", 309 "SS": "%S", 310 "ss": "%S", 311 "FF": "%f", 312 "ff": "%f", 313 "FF6": "%f", 314 "ff6": "%f", 315 } 316 317 def quote_identifier(self, expression: E, identify: bool = True) -> E: 318 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 319 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 320 if ( 321 isinstance(expression, exp.Identifier) 322 and isinstance(expression.parent, exp.Table) 323 and expression.name.lower() == "dual" 324 ): 325 return expression # type: ignore 326 327 return super().quote_identifier(expression, identify=identify) 328 329 class Parser(parser.Parser): 330 IDENTIFY_PIVOT_STRINGS = True 331 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 332 COLON_IS_JSON_EXTRACT = True 333 334 ID_VAR_TOKENS = { 335 *parser.Parser.ID_VAR_TOKENS, 336 TokenType.MATCH_CONDITION, 337 } 338 339 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 340 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 341 342 FUNCTIONS = { 343 **parser.Parser.FUNCTIONS, 344 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 345 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 346 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 347 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 348 this=seq_get(args, 1), expression=seq_get(args, 0) 349 ), 350 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 351 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 352 start=seq_get(args, 0), 353 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 354 step=seq_get(args, 2), 355 ), 356 "BITXOR": binary_from_function(exp.BitwiseXor), 357 "BIT_XOR": binary_from_function(exp.BitwiseXor), 358 "BOOLXOR": binary_from_function(exp.Xor), 359 "CONVERT_TIMEZONE": _build_convert_timezone, 360 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 361 "DATE_TRUNC": _date_trunc_to_time, 362 "DATEADD": _build_date_time_add(exp.DateAdd), 363 "DATEDIFF": _build_datediff, 364 "DIV0": _build_if_from_div0, 365 "FLATTEN": exp.Explode.from_arg_list, 366 "GET_PATH": lambda args, dialect: exp.JSONExtract( 367 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 368 ), 369 "IFF": exp.If.from_arg_list, 370 "LAST_DAY": lambda args: exp.LastDay( 371 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 372 ), 373 "LISTAGG": exp.GroupConcat.from_arg_list, 374 "MEDIAN": lambda args: exp.PercentileCont( 375 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 376 ), 377 "NULLIFZERO": _build_if_from_nullifzero, 378 "OBJECT_CONSTRUCT": _build_object_construct, 379 "REGEXP_REPLACE": _build_regexp_replace, 380 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 381 "RLIKE": exp.RegexpLike.from_arg_list, 382 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 383 "TIMEADD": _build_date_time_add(exp.TimeAdd), 384 "TIMEDIFF": _build_datediff, 385 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 386 "TIMESTAMPDIFF": _build_datediff, 387 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 388 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 389 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 390 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 391 "TO_NUMBER": lambda args: exp.ToNumber( 392 this=seq_get(args, 0), 393 format=seq_get(args, 1), 394 precision=seq_get(args, 2), 395 scale=seq_get(args, 3), 396 ), 397 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 398 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 399 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 400 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 401 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 402 "TO_VARCHAR": exp.ToChar.from_arg_list, 403 "ZEROIFNULL": _build_if_from_zeroifnull, 404 } 405 406 FUNCTION_PARSERS = { 407 **parser.Parser.FUNCTION_PARSERS, 408 "DATE_PART": lambda self: self._parse_date_part(), 409 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 410 } 411 FUNCTION_PARSERS.pop("TRIM") 412 413 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 414 415 RANGE_PARSERS = { 416 **parser.Parser.RANGE_PARSERS, 417 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 418 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 419 } 420 421 ALTER_PARSERS = { 422 **parser.Parser.ALTER_PARSERS, 423 "UNSET": lambda self: self.expression( 424 exp.Set, 425 tag=self._match_text_seq("TAG"), 426 expressions=self._parse_csv(self._parse_id_var), 427 unset=True, 428 ), 429 "SWAP": lambda self: self._parse_alter_table_swap(), 430 } 431 432 STATEMENT_PARSERS = { 433 **parser.Parser.STATEMENT_PARSERS, 434 TokenType.SHOW: lambda self: self._parse_show(), 435 } 436 437 PROPERTY_PARSERS = { 438 **parser.Parser.PROPERTY_PARSERS, 439 "LOCATION": lambda self: self._parse_location_property(), 440 } 441 442 TYPE_CONVERTERS = { 443 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 444 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 445 } 446 447 SHOW_PARSERS = { 448 "SCHEMAS": _show_parser("SCHEMAS"), 449 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 450 "OBJECTS": _show_parser("OBJECTS"), 451 "TERSE OBJECTS": _show_parser("OBJECTS"), 452 "TABLES": _show_parser("TABLES"), 453 "TERSE TABLES": _show_parser("TABLES"), 454 "VIEWS": _show_parser("VIEWS"), 455 "TERSE VIEWS": _show_parser("VIEWS"), 456 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 457 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 458 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 459 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 460 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 461 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 462 "SEQUENCES": _show_parser("SEQUENCES"), 463 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 464 "COLUMNS": _show_parser("COLUMNS"), 465 "USERS": _show_parser("USERS"), 466 "TERSE USERS": _show_parser("USERS"), 467 } 468 469 CONSTRAINT_PARSERS = { 470 **parser.Parser.CONSTRAINT_PARSERS, 471 "WITH": lambda self: self._parse_with_constraint(), 472 "MASKING": lambda self: self._parse_with_constraint(), 473 "PROJECTION": lambda self: self._parse_with_constraint(), 474 "TAG": lambda self: self._parse_with_constraint(), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 488 489 LAMBDAS = { 490 **parser.Parser.LAMBDAS, 491 TokenType.ARROW: lambda self, expressions: self.expression( 492 exp.Lambda, 493 this=self._replace_lambda( 494 self._parse_assignment(), 495 expressions, 496 ), 497 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 498 ), 499 } 500 501 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 502 if self._prev.token_type != TokenType.WITH: 503 self._retreat(self._index - 1) 504 505 if self._match_text_seq("MASKING", "POLICY"): 506 policy = self._parse_column() 507 return self.expression( 508 exp.MaskingPolicyColumnConstraint, 509 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 510 expressions=self._match(TokenType.USING) 511 and self._parse_wrapped_csv(self._parse_id_var), 512 ) 513 if self._match_text_seq("PROJECTION", "POLICY"): 514 policy = self._parse_column() 515 return self.expression( 516 exp.ProjectionPolicyColumnConstraint, 517 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 518 ) 519 if self._match(TokenType.TAG): 520 return self.expression( 521 exp.TagColumnConstraint, 522 expressions=self._parse_wrapped_csv(self._parse_property), 523 ) 524 525 return None 526 527 def _parse_create(self) -> exp.Create | exp.Command: 528 expression = super()._parse_create() 529 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 530 # Replace the Table node with the enclosed Identifier 531 expression.this.replace(expression.this.this) 532 533 return expression 534 535 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 536 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 537 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 538 this = self._parse_var() or self._parse_type() 539 540 if not this: 541 return None 542 543 self._match(TokenType.COMMA) 544 expression = self._parse_bitwise() 545 this = _map_date_part(this) 546 name = this.name.upper() 547 548 if name.startswith("EPOCH"): 549 if name == "EPOCH_MILLISECOND": 550 scale = 10**3 551 elif name == "EPOCH_MICROSECOND": 552 scale = 10**6 553 elif name == "EPOCH_NANOSECOND": 554 scale = 10**9 555 else: 556 scale = None 557 558 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 559 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 560 561 if scale: 562 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 563 564 return to_unix 565 566 return self.expression(exp.Extract, this=this, expression=expression) 567 568 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 569 if is_map: 570 # Keys are strings in Snowflake's objects, see also: 571 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 572 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 573 return self._parse_slice(self._parse_string()) 574 575 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 576 577 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 578 lateral = super()._parse_lateral() 579 if not lateral: 580 return lateral 581 582 if isinstance(lateral.this, exp.Explode): 583 table_alias = lateral.args.get("alias") 584 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 585 if table_alias and not table_alias.args.get("columns"): 586 table_alias.set("columns", columns) 587 elif not table_alias: 588 exp.alias_(lateral, "_flattened", table=columns, copy=False) 589 590 return lateral 591 592 def _parse_at_before(self, table: exp.Table) -> exp.Table: 593 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 594 index = self._index 595 if self._match_texts(("AT", "BEFORE")): 596 this = self._prev.text.upper() 597 kind = ( 598 self._match(TokenType.L_PAREN) 599 and self._match_texts(self.HISTORICAL_DATA_KIND) 600 and self._prev.text.upper() 601 ) 602 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 603 604 if expression: 605 self._match_r_paren() 606 when = self.expression( 607 exp.HistoricalData, this=this, kind=kind, expression=expression 608 ) 609 table.set("when", when) 610 else: 611 self._retreat(index) 612 613 return table 614 615 def _parse_table_parts( 616 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 617 ) -> exp.Table: 618 # https://docs.snowflake.com/en/user-guide/querying-stage 619 if self._match(TokenType.STRING, advance=False): 620 table = self._parse_string() 621 elif self._match_text_seq("@", advance=False): 622 table = self._parse_location_path() 623 else: 624 table = None 625 626 if table: 627 file_format = None 628 pattern = None 629 630 wrapped = self._match(TokenType.L_PAREN) 631 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 632 if self._match_text_seq("FILE_FORMAT", "=>"): 633 file_format = self._parse_string() or super()._parse_table_parts( 634 is_db_reference=is_db_reference 635 ) 636 elif self._match_text_seq("PATTERN", "=>"): 637 pattern = self._parse_string() 638 else: 639 break 640 641 self._match(TokenType.COMMA) 642 643 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 644 else: 645 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 646 647 return self._parse_at_before(table) 648 649 def _parse_id_var( 650 self, 651 any_token: bool = True, 652 tokens: t.Optional[t.Collection[TokenType]] = None, 653 ) -> t.Optional[exp.Expression]: 654 if self._match_text_seq("IDENTIFIER", "("): 655 identifier = ( 656 super()._parse_id_var(any_token=any_token, tokens=tokens) 657 or self._parse_string() 658 ) 659 self._match_r_paren() 660 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 661 662 return super()._parse_id_var(any_token=any_token, tokens=tokens) 663 664 def _parse_show_snowflake(self, this: str) -> exp.Show: 665 scope = None 666 scope_kind = None 667 668 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 669 # which is syntactically valid but has no effect on the output 670 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 671 672 history = self._match_text_seq("HISTORY") 673 674 like = self._parse_string() if self._match(TokenType.LIKE) else None 675 676 if self._match(TokenType.IN): 677 if self._match_text_seq("ACCOUNT"): 678 scope_kind = "ACCOUNT" 679 elif self._match_set(self.DB_CREATABLES): 680 scope_kind = self._prev.text.upper() 681 if self._curr: 682 scope = self._parse_table_parts() 683 elif self._curr: 684 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 685 scope = self._parse_table_parts() 686 687 return self.expression( 688 exp.Show, 689 **{ 690 "terse": terse, 691 "this": this, 692 "history": history, 693 "like": like, 694 "scope": scope, 695 "scope_kind": scope_kind, 696 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 697 "limit": self._parse_limit(), 698 "from": self._parse_string() if self._match(TokenType.FROM) else None, 699 }, 700 ) 701 702 def _parse_alter_table_swap(self) -> exp.SwapTable: 703 self._match_text_seq("WITH") 704 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 705 706 def _parse_location_property(self) -> exp.LocationProperty: 707 self._match(TokenType.EQ) 708 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 709 710 def _parse_file_location(self) -> t.Optional[exp.Expression]: 711 # Parse either a subquery or a staged file 712 return ( 713 self._parse_select(table=True, parse_subquery_alias=False) 714 if self._match(TokenType.L_PAREN, advance=False) 715 else self._parse_table_parts() 716 ) 717 718 def _parse_location_path(self) -> exp.Var: 719 parts = [self._advance_any(ignore_reserved=True)] 720 721 # We avoid consuming a comma token because external tables like @foo and @bar 722 # can be joined in a query with a comma separator, as well as closing paren 723 # in case of subqueries 724 while self._is_connected() and not self._match_set( 725 (TokenType.COMMA, TokenType.R_PAREN), advance=False 726 ): 727 parts.append(self._advance_any(ignore_reserved=True)) 728 729 return exp.var("".join(part.text for part in parts if part)) 730 731 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 732 this = super()._parse_lambda_arg() 733 734 if not this: 735 return this 736 737 typ = self._parse_types() 738 739 if typ: 740 return self.expression(exp.Cast, this=this, to=typ) 741 742 return this 743 744 class Tokenizer(tokens.Tokenizer): 745 STRING_ESCAPES = ["\\", "'"] 746 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 747 RAW_STRINGS = ["$$"] 748 COMMENTS = ["--", "//", ("/*", "*/")] 749 750 KEYWORDS = { 751 **tokens.Tokenizer.KEYWORDS, 752 "BYTEINT": TokenType.INT, 753 "CHAR VARYING": TokenType.VARCHAR, 754 "CHARACTER VARYING": TokenType.VARCHAR, 755 "EXCLUDE": TokenType.EXCEPT, 756 "ILIKE ANY": TokenType.ILIKE_ANY, 757 "LIKE ANY": TokenType.LIKE_ANY, 758 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 759 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 760 "MINUS": TokenType.EXCEPT, 761 "NCHAR VARYING": TokenType.VARCHAR, 762 "PUT": TokenType.COMMAND, 763 "REMOVE": TokenType.COMMAND, 764 "RM": TokenType.COMMAND, 765 "SAMPLE": TokenType.TABLE_SAMPLE, 766 "SQL_DOUBLE": TokenType.DOUBLE, 767 "SQL_VARCHAR": TokenType.VARCHAR, 768 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 769 "TAG": TokenType.TAG, 770 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 771 "TOP": TokenType.TOP, 772 "WAREHOUSE": TokenType.WAREHOUSE, 773 "STREAMLIT": TokenType.STREAMLIT, 774 } 775 776 SINGLE_TOKENS = { 777 **tokens.Tokenizer.SINGLE_TOKENS, 778 "$": TokenType.PARAMETER, 779 } 780 781 VAR_SINGLE_TOKENS = {"$"} 782 783 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 784 785 class Generator(generator.Generator): 786 PARAMETER_TOKEN = "$" 787 MATCHED_BY_SOURCE = False 788 SINGLE_STRING_INTERVAL = True 789 JOIN_HINTS = False 790 TABLE_HINTS = False 791 QUERY_HINTS = False 792 AGGREGATE_FILTER_SUPPORTED = False 793 SUPPORTS_TABLE_COPY = False 794 COLLATE_IS_FUNC = True 795 LIMIT_ONLY_LITERALS = True 796 JSON_KEY_VALUE_PAIR_SEP = "," 797 INSERT_OVERWRITE = " OVERWRITE INTO" 798 STRUCT_DELIMITER = ("(", ")") 799 COPY_PARAMS_ARE_WRAPPED = False 800 COPY_PARAMS_EQ_REQUIRED = True 801 STAR_EXCEPT = "EXCLUDE" 802 803 TRANSFORMS = { 804 **generator.Generator.TRANSFORMS, 805 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 806 exp.ArgMax: rename_func("MAX_BY"), 807 exp.ArgMin: rename_func("MIN_BY"), 808 exp.Array: inline_array_sql, 809 exp.ArrayConcat: rename_func("ARRAY_CAT"), 810 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 811 exp.AtTimeZone: lambda self, e: self.func( 812 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 813 ), 814 exp.BitwiseXor: rename_func("BITXOR"), 815 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 816 exp.DateAdd: date_delta_sql("DATEADD"), 817 exp.DateDiff: date_delta_sql("DATEDIFF"), 818 exp.DateStrToDate: datestrtodate_sql, 819 exp.DayOfMonth: rename_func("DAYOFMONTH"), 820 exp.DayOfWeek: rename_func("DAYOFWEEK"), 821 exp.DayOfYear: rename_func("DAYOFYEAR"), 822 exp.Explode: rename_func("FLATTEN"), 823 exp.Extract: rename_func("DATE_PART"), 824 exp.FromTimeZone: lambda self, e: self.func( 825 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 826 ), 827 exp.GenerateSeries: lambda self, e: self.func( 828 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 829 ), 830 exp.GroupConcat: rename_func("LISTAGG"), 831 exp.If: if_sql(name="IFF", false_value="NULL"), 832 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 833 exp.JSONExtractScalar: lambda self, e: self.func( 834 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 835 ), 836 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 837 exp.JSONPathRoot: lambda *_: "", 838 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 839 exp.LogicalOr: rename_func("BOOLOR_AGG"), 840 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 841 exp.Max: max_or_greatest, 842 exp.Min: min_or_least, 843 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 844 exp.PercentileCont: transforms.preprocess( 845 [transforms.add_within_group_for_percentiles] 846 ), 847 exp.PercentileDisc: transforms.preprocess( 848 [transforms.add_within_group_for_percentiles] 849 ), 850 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 851 exp.RegexpILike: _regexpilike_sql, 852 exp.Rand: rename_func("RANDOM"), 853 exp.Select: transforms.preprocess( 854 [ 855 transforms.eliminate_distinct_on, 856 transforms.explode_to_unnest(), 857 transforms.eliminate_semi_and_anti_joins, 858 ] 859 ), 860 exp.SHA: rename_func("SHA1"), 861 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 862 exp.StartsWith: rename_func("STARTSWITH"), 863 exp.StrPosition: lambda self, e: self.func( 864 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 865 ), 866 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 867 exp.Stuff: rename_func("INSERT"), 868 exp.TimeAdd: date_delta_sql("TIMEADD"), 869 exp.TimestampDiff: lambda self, e: self.func( 870 "TIMESTAMPDIFF", e.unit, e.expression, e.this 871 ), 872 exp.TimestampTrunc: timestamptrunc_sql(), 873 exp.TimeStrToTime: timestrtotime_sql, 874 exp.TimeToStr: lambda self, e: self.func( 875 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 876 ), 877 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 878 exp.ToArray: rename_func("TO_ARRAY"), 879 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 880 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 881 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 882 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 883 exp.TsOrDsToDate: lambda self, e: self.func( 884 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 885 ), 886 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 887 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 888 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 889 exp.Xor: rename_func("BOOLXOR"), 890 } 891 892 SUPPORTED_JSON_PATH_PARTS = { 893 exp.JSONPathKey, 894 exp.JSONPathRoot, 895 exp.JSONPathSubscript, 896 } 897 898 TYPE_MAPPING = { 899 **generator.Generator.TYPE_MAPPING, 900 exp.DataType.Type.NESTED: "OBJECT", 901 exp.DataType.Type.STRUCT: "OBJECT", 902 } 903 904 PROPERTIES_LOCATION = { 905 **generator.Generator.PROPERTIES_LOCATION, 906 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 907 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 908 } 909 910 UNSUPPORTED_VALUES_EXPRESSIONS = { 911 exp.Map, 912 exp.StarMap, 913 exp.Struct, 914 exp.VarMap, 915 } 916 917 def with_properties(self, properties: exp.Properties) -> str: 918 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 919 920 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 921 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 922 values_as_table = False 923 924 return super().values_sql(expression, values_as_table=values_as_table) 925 926 def datatype_sql(self, expression: exp.DataType) -> str: 927 expressions = expression.expressions 928 if ( 929 expressions 930 and expression.is_type(*exp.DataType.STRUCT_TYPES) 931 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 932 ): 933 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 934 return "OBJECT" 935 936 return super().datatype_sql(expression) 937 938 def tonumber_sql(self, expression: exp.ToNumber) -> str: 939 return self.func( 940 "TO_NUMBER", 941 expression.this, 942 expression.args.get("format"), 943 expression.args.get("precision"), 944 expression.args.get("scale"), 945 ) 946 947 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 948 milli = expression.args.get("milli") 949 if milli is not None: 950 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 951 expression.set("nano", milli_to_nano) 952 953 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 954 955 def trycast_sql(self, expression: exp.TryCast) -> str: 956 value = expression.this 957 958 if value.type is None: 959 from sqlglot.optimizer.annotate_types import annotate_types 960 961 value = annotate_types(value) 962 963 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 964 return super().trycast_sql(expression) 965 966 # TRY_CAST only works for string values in Snowflake 967 return self.cast_sql(expression) 968 969 def log_sql(self, expression: exp.Log) -> str: 970 if not expression.expression: 971 return self.func("LN", expression.this) 972 973 return super().log_sql(expression) 974 975 def unnest_sql(self, expression: exp.Unnest) -> str: 976 unnest_alias = expression.args.get("alias") 977 offset = expression.args.get("offset") 978 979 columns = [ 980 exp.to_identifier("seq"), 981 exp.to_identifier("key"), 982 exp.to_identifier("path"), 983 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 984 seq_get(unnest_alias.columns if unnest_alias else [], 0) 985 or exp.to_identifier("value"), 986 exp.to_identifier("this"), 987 ] 988 989 if unnest_alias: 990 unnest_alias.set("columns", columns) 991 else: 992 unnest_alias = exp.TableAlias(this="_u", columns=columns) 993 994 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 995 alias = self.sql(unnest_alias) 996 alias = f" AS {alias}" if alias else "" 997 return f"{explode}{alias}" 998 999 def show_sql(self, expression: exp.Show) -> str: 1000 terse = "TERSE " if expression.args.get("terse") else "" 1001 history = " HISTORY" if expression.args.get("history") else "" 1002 like = self.sql(expression, "like") 1003 like = f" LIKE {like}" if like else "" 1004 1005 scope = self.sql(expression, "scope") 1006 scope = f" {scope}" if scope else "" 1007 1008 scope_kind = self.sql(expression, "scope_kind") 1009 if scope_kind: 1010 scope_kind = f" IN {scope_kind}" 1011 1012 starts_with = self.sql(expression, "starts_with") 1013 if starts_with: 1014 starts_with = f" STARTS WITH {starts_with}" 1015 1016 limit = self.sql(expression, "limit") 1017 1018 from_ = self.sql(expression, "from") 1019 if from_: 1020 from_ = f" FROM {from_}" 1021 1022 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1023 1024 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1025 # Other dialects don't support all of the following parameters, so we need to 1026 # generate default values as necessary to ensure the transpilation is correct 1027 group = expression.args.get("group") 1028 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1029 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1030 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1031 1032 return self.func( 1033 "REGEXP_SUBSTR", 1034 expression.this, 1035 expression.expression, 1036 position, 1037 occurrence, 1038 parameters, 1039 group, 1040 ) 1041 1042 def except_op(self, expression: exp.Except) -> str: 1043 if not expression.args.get("distinct"): 1044 self.unsupported("EXCEPT with All is not supported in Snowflake") 1045 return super().except_op(expression) 1046 1047 def intersect_op(self, expression: exp.Intersect) -> str: 1048 if not expression.args.get("distinct"): 1049 self.unsupported("INTERSECT with All is not supported in Snowflake") 1050 return super().intersect_op(expression) 1051 1052 def describe_sql(self, expression: exp.Describe) -> str: 1053 # Default to table if kind is unknown 1054 kind_value = expression.args.get("kind") or "TABLE" 1055 kind = f" {kind_value}" if kind_value else "" 1056 this = f" {self.sql(expression, 'this')}" 1057 expressions = self.expressions(expression, flat=True) 1058 expressions = f" {expressions}" if expressions else "" 1059 return f"DESCRIBE{kind}{this}{expressions}" 1060 1061 def generatedasidentitycolumnconstraint_sql( 1062 self, expression: exp.GeneratedAsIdentityColumnConstraint 1063 ) -> str: 1064 start = expression.args.get("start") 1065 start = f" START {start}" if start else "" 1066 increment = expression.args.get("increment") 1067 increment = f" INCREMENT {increment}" if increment else "" 1068 return f"AUTOINCREMENT{start}{increment}" 1069 1070 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1071 this = self.sql(expression, "this") 1072 return f"SWAP WITH {this}" 1073 1074 def cluster_sql(self, expression: exp.Cluster) -> str: 1075 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1076 1077 def struct_sql(self, expression: exp.Struct) -> str: 1078 keys = [] 1079 values = [] 1080 1081 for i, e in enumerate(expression.expressions): 1082 if isinstance(e, exp.PropertyEQ): 1083 keys.append( 1084 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1085 ) 1086 values.append(e.expression) 1087 else: 1088 keys.append(exp.Literal.string(f"_{i}")) 1089 values.append(e) 1090 1091 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1092 1093 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1094 if expression.args.get("weight") or expression.args.get("accuracy"): 1095 self.unsupported( 1096 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1097 ) 1098 1099 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1100 1101 def alterset_sql(self, expression: exp.AlterSet) -> str: 1102 exprs = self.expressions(expression, flat=True) 1103 exprs = f" {exprs}" if exprs else "" 1104 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1105 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1106 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1107 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1108 tag = self.expressions(expression, key="tag", flat=True) 1109 tag = f" TAG {tag}" if tag else "" 1110 1111 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
317 def quote_identifier(self, expression: E, identify: bool = True) -> E: 318 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 319 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 320 if ( 321 isinstance(expression, exp.Identifier) 322 and isinstance(expression.parent, exp.Table) 323 and expression.name.lower() == "dual" 324 ): 325 return expression # type: ignore 326 327 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_COLUMN_JOIN_MARKS
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
329 class Parser(parser.Parser): 330 IDENTIFY_PIVOT_STRINGS = True 331 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 332 COLON_IS_JSON_EXTRACT = True 333 334 ID_VAR_TOKENS = { 335 *parser.Parser.ID_VAR_TOKENS, 336 TokenType.MATCH_CONDITION, 337 } 338 339 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 340 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 341 342 FUNCTIONS = { 343 **parser.Parser.FUNCTIONS, 344 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 345 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 346 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 347 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 348 this=seq_get(args, 1), expression=seq_get(args, 0) 349 ), 350 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 351 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 352 start=seq_get(args, 0), 353 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 354 step=seq_get(args, 2), 355 ), 356 "BITXOR": binary_from_function(exp.BitwiseXor), 357 "BIT_XOR": binary_from_function(exp.BitwiseXor), 358 "BOOLXOR": binary_from_function(exp.Xor), 359 "CONVERT_TIMEZONE": _build_convert_timezone, 360 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 361 "DATE_TRUNC": _date_trunc_to_time, 362 "DATEADD": _build_date_time_add(exp.DateAdd), 363 "DATEDIFF": _build_datediff, 364 "DIV0": _build_if_from_div0, 365 "FLATTEN": exp.Explode.from_arg_list, 366 "GET_PATH": lambda args, dialect: exp.JSONExtract( 367 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 368 ), 369 "IFF": exp.If.from_arg_list, 370 "LAST_DAY": lambda args: exp.LastDay( 371 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 372 ), 373 "LISTAGG": exp.GroupConcat.from_arg_list, 374 "MEDIAN": lambda args: exp.PercentileCont( 375 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 376 ), 377 "NULLIFZERO": _build_if_from_nullifzero, 378 "OBJECT_CONSTRUCT": _build_object_construct, 379 "REGEXP_REPLACE": _build_regexp_replace, 380 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 381 "RLIKE": exp.RegexpLike.from_arg_list, 382 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 383 "TIMEADD": _build_date_time_add(exp.TimeAdd), 384 "TIMEDIFF": _build_datediff, 385 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 386 "TIMESTAMPDIFF": _build_datediff, 387 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 388 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 389 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 390 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 391 "TO_NUMBER": lambda args: exp.ToNumber( 392 this=seq_get(args, 0), 393 format=seq_get(args, 1), 394 precision=seq_get(args, 2), 395 scale=seq_get(args, 3), 396 ), 397 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 398 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 399 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 400 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 401 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 402 "TO_VARCHAR": exp.ToChar.from_arg_list, 403 "ZEROIFNULL": _build_if_from_zeroifnull, 404 } 405 406 FUNCTION_PARSERS = { 407 **parser.Parser.FUNCTION_PARSERS, 408 "DATE_PART": lambda self: self._parse_date_part(), 409 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 410 } 411 FUNCTION_PARSERS.pop("TRIM") 412 413 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 414 415 RANGE_PARSERS = { 416 **parser.Parser.RANGE_PARSERS, 417 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 418 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 419 } 420 421 ALTER_PARSERS = { 422 **parser.Parser.ALTER_PARSERS, 423 "UNSET": lambda self: self.expression( 424 exp.Set, 425 tag=self._match_text_seq("TAG"), 426 expressions=self._parse_csv(self._parse_id_var), 427 unset=True, 428 ), 429 "SWAP": lambda self: self._parse_alter_table_swap(), 430 } 431 432 STATEMENT_PARSERS = { 433 **parser.Parser.STATEMENT_PARSERS, 434 TokenType.SHOW: lambda self: self._parse_show(), 435 } 436 437 PROPERTY_PARSERS = { 438 **parser.Parser.PROPERTY_PARSERS, 439 "LOCATION": lambda self: self._parse_location_property(), 440 } 441 442 TYPE_CONVERTERS = { 443 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 444 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 445 } 446 447 SHOW_PARSERS = { 448 "SCHEMAS": _show_parser("SCHEMAS"), 449 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 450 "OBJECTS": _show_parser("OBJECTS"), 451 "TERSE OBJECTS": _show_parser("OBJECTS"), 452 "TABLES": _show_parser("TABLES"), 453 "TERSE TABLES": _show_parser("TABLES"), 454 "VIEWS": _show_parser("VIEWS"), 455 "TERSE VIEWS": _show_parser("VIEWS"), 456 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 457 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 458 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 459 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 460 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 461 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 462 "SEQUENCES": _show_parser("SEQUENCES"), 463 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 464 "COLUMNS": _show_parser("COLUMNS"), 465 "USERS": _show_parser("USERS"), 466 "TERSE USERS": _show_parser("USERS"), 467 } 468 469 CONSTRAINT_PARSERS = { 470 **parser.Parser.CONSTRAINT_PARSERS, 471 "WITH": lambda self: self._parse_with_constraint(), 472 "MASKING": lambda self: self._parse_with_constraint(), 473 "PROJECTION": lambda self: self._parse_with_constraint(), 474 "TAG": lambda self: self._parse_with_constraint(), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 488 489 LAMBDAS = { 490 **parser.Parser.LAMBDAS, 491 TokenType.ARROW: lambda self, expressions: self.expression( 492 exp.Lambda, 493 this=self._replace_lambda( 494 self._parse_assignment(), 495 expressions, 496 ), 497 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 498 ), 499 } 500 501 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 502 if self._prev.token_type != TokenType.WITH: 503 self._retreat(self._index - 1) 504 505 if self._match_text_seq("MASKING", "POLICY"): 506 policy = self._parse_column() 507 return self.expression( 508 exp.MaskingPolicyColumnConstraint, 509 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 510 expressions=self._match(TokenType.USING) 511 and self._parse_wrapped_csv(self._parse_id_var), 512 ) 513 if self._match_text_seq("PROJECTION", "POLICY"): 514 policy = self._parse_column() 515 return self.expression( 516 exp.ProjectionPolicyColumnConstraint, 517 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 518 ) 519 if self._match(TokenType.TAG): 520 return self.expression( 521 exp.TagColumnConstraint, 522 expressions=self._parse_wrapped_csv(self._parse_property), 523 ) 524 525 return None 526 527 def _parse_create(self) -> exp.Create | exp.Command: 528 expression = super()._parse_create() 529 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 530 # Replace the Table node with the enclosed Identifier 531 expression.this.replace(expression.this.this) 532 533 return expression 534 535 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 536 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 537 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 538 this = self._parse_var() or self._parse_type() 539 540 if not this: 541 return None 542 543 self._match(TokenType.COMMA) 544 expression = self._parse_bitwise() 545 this = _map_date_part(this) 546 name = this.name.upper() 547 548 if name.startswith("EPOCH"): 549 if name == "EPOCH_MILLISECOND": 550 scale = 10**3 551 elif name == "EPOCH_MICROSECOND": 552 scale = 10**6 553 elif name == "EPOCH_NANOSECOND": 554 scale = 10**9 555 else: 556 scale = None 557 558 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 559 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 560 561 if scale: 562 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 563 564 return to_unix 565 566 return self.expression(exp.Extract, this=this, expression=expression) 567 568 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 569 if is_map: 570 # Keys are strings in Snowflake's objects, see also: 571 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 572 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 573 return self._parse_slice(self._parse_string()) 574 575 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 576 577 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 578 lateral = super()._parse_lateral() 579 if not lateral: 580 return lateral 581 582 if isinstance(lateral.this, exp.Explode): 583 table_alias = lateral.args.get("alias") 584 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 585 if table_alias and not table_alias.args.get("columns"): 586 table_alias.set("columns", columns) 587 elif not table_alias: 588 exp.alias_(lateral, "_flattened", table=columns, copy=False) 589 590 return lateral 591 592 def _parse_at_before(self, table: exp.Table) -> exp.Table: 593 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 594 index = self._index 595 if self._match_texts(("AT", "BEFORE")): 596 this = self._prev.text.upper() 597 kind = ( 598 self._match(TokenType.L_PAREN) 599 and self._match_texts(self.HISTORICAL_DATA_KIND) 600 and self._prev.text.upper() 601 ) 602 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 603 604 if expression: 605 self._match_r_paren() 606 when = self.expression( 607 exp.HistoricalData, this=this, kind=kind, expression=expression 608 ) 609 table.set("when", when) 610 else: 611 self._retreat(index) 612 613 return table 614 615 def _parse_table_parts( 616 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 617 ) -> exp.Table: 618 # https://docs.snowflake.com/en/user-guide/querying-stage 619 if self._match(TokenType.STRING, advance=False): 620 table = self._parse_string() 621 elif self._match_text_seq("@", advance=False): 622 table = self._parse_location_path() 623 else: 624 table = None 625 626 if table: 627 file_format = None 628 pattern = None 629 630 wrapped = self._match(TokenType.L_PAREN) 631 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 632 if self._match_text_seq("FILE_FORMAT", "=>"): 633 file_format = self._parse_string() or super()._parse_table_parts( 634 is_db_reference=is_db_reference 635 ) 636 elif self._match_text_seq("PATTERN", "=>"): 637 pattern = self._parse_string() 638 else: 639 break 640 641 self._match(TokenType.COMMA) 642 643 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 644 else: 645 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 646 647 return self._parse_at_before(table) 648 649 def _parse_id_var( 650 self, 651 any_token: bool = True, 652 tokens: t.Optional[t.Collection[TokenType]] = None, 653 ) -> t.Optional[exp.Expression]: 654 if self._match_text_seq("IDENTIFIER", "("): 655 identifier = ( 656 super()._parse_id_var(any_token=any_token, tokens=tokens) 657 or self._parse_string() 658 ) 659 self._match_r_paren() 660 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 661 662 return super()._parse_id_var(any_token=any_token, tokens=tokens) 663 664 def _parse_show_snowflake(self, this: str) -> exp.Show: 665 scope = None 666 scope_kind = None 667 668 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 669 # which is syntactically valid but has no effect on the output 670 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 671 672 history = self._match_text_seq("HISTORY") 673 674 like = self._parse_string() if self._match(TokenType.LIKE) else None 675 676 if self._match(TokenType.IN): 677 if self._match_text_seq("ACCOUNT"): 678 scope_kind = "ACCOUNT" 679 elif self._match_set(self.DB_CREATABLES): 680 scope_kind = self._prev.text.upper() 681 if self._curr: 682 scope = self._parse_table_parts() 683 elif self._curr: 684 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 685 scope = self._parse_table_parts() 686 687 return self.expression( 688 exp.Show, 689 **{ 690 "terse": terse, 691 "this": this, 692 "history": history, 693 "like": like, 694 "scope": scope, 695 "scope_kind": scope_kind, 696 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 697 "limit": self._parse_limit(), 698 "from": self._parse_string() if self._match(TokenType.FROM) else None, 699 }, 700 ) 701 702 def _parse_alter_table_swap(self) -> exp.SwapTable: 703 self._match_text_seq("WITH") 704 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 705 706 def _parse_location_property(self) -> exp.LocationProperty: 707 self._match(TokenType.EQ) 708 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 709 710 def _parse_file_location(self) -> t.Optional[exp.Expression]: 711 # Parse either a subquery or a staged file 712 return ( 713 self._parse_select(table=True, parse_subquery_alias=False) 714 if self._match(TokenType.L_PAREN, advance=False) 715 else self._parse_table_parts() 716 ) 717 718 def _parse_location_path(self) -> exp.Var: 719 parts = [self._advance_any(ignore_reserved=True)] 720 721 # We avoid consuming a comma token because external tables like @foo and @bar 722 # can be joined in a query with a comma separator, as well as closing paren 723 # in case of subqueries 724 while self._is_connected() and not self._match_set( 725 (TokenType.COMMA, TokenType.R_PAREN), advance=False 726 ): 727 parts.append(self._advance_any(ignore_reserved=True)) 728 729 return exp.var("".join(part.text for part in parts if part)) 730 731 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 732 this = super()._parse_lambda_arg() 733 734 if not this: 735 return this 736 737 typ = self._parse_types() 738 739 if typ: 740 return self.expression(exp.Cast, this=this, to=typ) 741 742 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
744 class Tokenizer(tokens.Tokenizer): 745 STRING_ESCAPES = ["\\", "'"] 746 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 747 RAW_STRINGS = ["$$"] 748 COMMENTS = ["--", "//", ("/*", "*/")] 749 750 KEYWORDS = { 751 **tokens.Tokenizer.KEYWORDS, 752 "BYTEINT": TokenType.INT, 753 "CHAR VARYING": TokenType.VARCHAR, 754 "CHARACTER VARYING": TokenType.VARCHAR, 755 "EXCLUDE": TokenType.EXCEPT, 756 "ILIKE ANY": TokenType.ILIKE_ANY, 757 "LIKE ANY": TokenType.LIKE_ANY, 758 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 759 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 760 "MINUS": TokenType.EXCEPT, 761 "NCHAR VARYING": TokenType.VARCHAR, 762 "PUT": TokenType.COMMAND, 763 "REMOVE": TokenType.COMMAND, 764 "RM": TokenType.COMMAND, 765 "SAMPLE": TokenType.TABLE_SAMPLE, 766 "SQL_DOUBLE": TokenType.DOUBLE, 767 "SQL_VARCHAR": TokenType.VARCHAR, 768 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 769 "TAG": TokenType.TAG, 770 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 771 "TOP": TokenType.TOP, 772 "WAREHOUSE": TokenType.WAREHOUSE, 773 "STREAMLIT": TokenType.STREAMLIT, 774 } 775 776 SINGLE_TOKENS = { 777 **tokens.Tokenizer.SINGLE_TOKENS, 778 "$": TokenType.PARAMETER, 779 } 780 781 VAR_SINGLE_TOKENS = {"$"} 782 783 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
785 class Generator(generator.Generator): 786 PARAMETER_TOKEN = "$" 787 MATCHED_BY_SOURCE = False 788 SINGLE_STRING_INTERVAL = True 789 JOIN_HINTS = False 790 TABLE_HINTS = False 791 QUERY_HINTS = False 792 AGGREGATE_FILTER_SUPPORTED = False 793 SUPPORTS_TABLE_COPY = False 794 COLLATE_IS_FUNC = True 795 LIMIT_ONLY_LITERALS = True 796 JSON_KEY_VALUE_PAIR_SEP = "," 797 INSERT_OVERWRITE = " OVERWRITE INTO" 798 STRUCT_DELIMITER = ("(", ")") 799 COPY_PARAMS_ARE_WRAPPED = False 800 COPY_PARAMS_EQ_REQUIRED = True 801 STAR_EXCEPT = "EXCLUDE" 802 803 TRANSFORMS = { 804 **generator.Generator.TRANSFORMS, 805 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 806 exp.ArgMax: rename_func("MAX_BY"), 807 exp.ArgMin: rename_func("MIN_BY"), 808 exp.Array: inline_array_sql, 809 exp.ArrayConcat: rename_func("ARRAY_CAT"), 810 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 811 exp.AtTimeZone: lambda self, e: self.func( 812 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 813 ), 814 exp.BitwiseXor: rename_func("BITXOR"), 815 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 816 exp.DateAdd: date_delta_sql("DATEADD"), 817 exp.DateDiff: date_delta_sql("DATEDIFF"), 818 exp.DateStrToDate: datestrtodate_sql, 819 exp.DayOfMonth: rename_func("DAYOFMONTH"), 820 exp.DayOfWeek: rename_func("DAYOFWEEK"), 821 exp.DayOfYear: rename_func("DAYOFYEAR"), 822 exp.Explode: rename_func("FLATTEN"), 823 exp.Extract: rename_func("DATE_PART"), 824 exp.FromTimeZone: lambda self, e: self.func( 825 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 826 ), 827 exp.GenerateSeries: lambda self, e: self.func( 828 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 829 ), 830 exp.GroupConcat: rename_func("LISTAGG"), 831 exp.If: if_sql(name="IFF", false_value="NULL"), 832 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 833 exp.JSONExtractScalar: lambda self, e: self.func( 834 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 835 ), 836 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 837 exp.JSONPathRoot: lambda *_: "", 838 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 839 exp.LogicalOr: rename_func("BOOLOR_AGG"), 840 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 841 exp.Max: max_or_greatest, 842 exp.Min: min_or_least, 843 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 844 exp.PercentileCont: transforms.preprocess( 845 [transforms.add_within_group_for_percentiles] 846 ), 847 exp.PercentileDisc: transforms.preprocess( 848 [transforms.add_within_group_for_percentiles] 849 ), 850 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 851 exp.RegexpILike: _regexpilike_sql, 852 exp.Rand: rename_func("RANDOM"), 853 exp.Select: transforms.preprocess( 854 [ 855 transforms.eliminate_distinct_on, 856 transforms.explode_to_unnest(), 857 transforms.eliminate_semi_and_anti_joins, 858 ] 859 ), 860 exp.SHA: rename_func("SHA1"), 861 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 862 exp.StartsWith: rename_func("STARTSWITH"), 863 exp.StrPosition: lambda self, e: self.func( 864 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 865 ), 866 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 867 exp.Stuff: rename_func("INSERT"), 868 exp.TimeAdd: date_delta_sql("TIMEADD"), 869 exp.TimestampDiff: lambda self, e: self.func( 870 "TIMESTAMPDIFF", e.unit, e.expression, e.this 871 ), 872 exp.TimestampTrunc: timestamptrunc_sql(), 873 exp.TimeStrToTime: timestrtotime_sql, 874 exp.TimeToStr: lambda self, e: self.func( 875 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 876 ), 877 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 878 exp.ToArray: rename_func("TO_ARRAY"), 879 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 880 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 881 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 882 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 883 exp.TsOrDsToDate: lambda self, e: self.func( 884 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 885 ), 886 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 887 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 888 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 889 exp.Xor: rename_func("BOOLXOR"), 890 } 891 892 SUPPORTED_JSON_PATH_PARTS = { 893 exp.JSONPathKey, 894 exp.JSONPathRoot, 895 exp.JSONPathSubscript, 896 } 897 898 TYPE_MAPPING = { 899 **generator.Generator.TYPE_MAPPING, 900 exp.DataType.Type.NESTED: "OBJECT", 901 exp.DataType.Type.STRUCT: "OBJECT", 902 } 903 904 PROPERTIES_LOCATION = { 905 **generator.Generator.PROPERTIES_LOCATION, 906 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 907 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 908 } 909 910 UNSUPPORTED_VALUES_EXPRESSIONS = { 911 exp.Map, 912 exp.StarMap, 913 exp.Struct, 914 exp.VarMap, 915 } 916 917 def with_properties(self, properties: exp.Properties) -> str: 918 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 919 920 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 921 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 922 values_as_table = False 923 924 return super().values_sql(expression, values_as_table=values_as_table) 925 926 def datatype_sql(self, expression: exp.DataType) -> str: 927 expressions = expression.expressions 928 if ( 929 expressions 930 and expression.is_type(*exp.DataType.STRUCT_TYPES) 931 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 932 ): 933 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 934 return "OBJECT" 935 936 return super().datatype_sql(expression) 937 938 def tonumber_sql(self, expression: exp.ToNumber) -> str: 939 return self.func( 940 "TO_NUMBER", 941 expression.this, 942 expression.args.get("format"), 943 expression.args.get("precision"), 944 expression.args.get("scale"), 945 ) 946 947 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 948 milli = expression.args.get("milli") 949 if milli is not None: 950 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 951 expression.set("nano", milli_to_nano) 952 953 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 954 955 def trycast_sql(self, expression: exp.TryCast) -> str: 956 value = expression.this 957 958 if value.type is None: 959 from sqlglot.optimizer.annotate_types import annotate_types 960 961 value = annotate_types(value) 962 963 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 964 return super().trycast_sql(expression) 965 966 # TRY_CAST only works for string values in Snowflake 967 return self.cast_sql(expression) 968 969 def log_sql(self, expression: exp.Log) -> str: 970 if not expression.expression: 971 return self.func("LN", expression.this) 972 973 return super().log_sql(expression) 974 975 def unnest_sql(self, expression: exp.Unnest) -> str: 976 unnest_alias = expression.args.get("alias") 977 offset = expression.args.get("offset") 978 979 columns = [ 980 exp.to_identifier("seq"), 981 exp.to_identifier("key"), 982 exp.to_identifier("path"), 983 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 984 seq_get(unnest_alias.columns if unnest_alias else [], 0) 985 or exp.to_identifier("value"), 986 exp.to_identifier("this"), 987 ] 988 989 if unnest_alias: 990 unnest_alias.set("columns", columns) 991 else: 992 unnest_alias = exp.TableAlias(this="_u", columns=columns) 993 994 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 995 alias = self.sql(unnest_alias) 996 alias = f" AS {alias}" if alias else "" 997 return f"{explode}{alias}" 998 999 def show_sql(self, expression: exp.Show) -> str: 1000 terse = "TERSE " if expression.args.get("terse") else "" 1001 history = " HISTORY" if expression.args.get("history") else "" 1002 like = self.sql(expression, "like") 1003 like = f" LIKE {like}" if like else "" 1004 1005 scope = self.sql(expression, "scope") 1006 scope = f" {scope}" if scope else "" 1007 1008 scope_kind = self.sql(expression, "scope_kind") 1009 if scope_kind: 1010 scope_kind = f" IN {scope_kind}" 1011 1012 starts_with = self.sql(expression, "starts_with") 1013 if starts_with: 1014 starts_with = f" STARTS WITH {starts_with}" 1015 1016 limit = self.sql(expression, "limit") 1017 1018 from_ = self.sql(expression, "from") 1019 if from_: 1020 from_ = f" FROM {from_}" 1021 1022 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1023 1024 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1025 # Other dialects don't support all of the following parameters, so we need to 1026 # generate default values as necessary to ensure the transpilation is correct 1027 group = expression.args.get("group") 1028 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1029 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1030 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1031 1032 return self.func( 1033 "REGEXP_SUBSTR", 1034 expression.this, 1035 expression.expression, 1036 position, 1037 occurrence, 1038 parameters, 1039 group, 1040 ) 1041 1042 def except_op(self, expression: exp.Except) -> str: 1043 if not expression.args.get("distinct"): 1044 self.unsupported("EXCEPT with All is not supported in Snowflake") 1045 return super().except_op(expression) 1046 1047 def intersect_op(self, expression: exp.Intersect) -> str: 1048 if not expression.args.get("distinct"): 1049 self.unsupported("INTERSECT with All is not supported in Snowflake") 1050 return super().intersect_op(expression) 1051 1052 def describe_sql(self, expression: exp.Describe) -> str: 1053 # Default to table if kind is unknown 1054 kind_value = expression.args.get("kind") or "TABLE" 1055 kind = f" {kind_value}" if kind_value else "" 1056 this = f" {self.sql(expression, 'this')}" 1057 expressions = self.expressions(expression, flat=True) 1058 expressions = f" {expressions}" if expressions else "" 1059 return f"DESCRIBE{kind}{this}{expressions}" 1060 1061 def generatedasidentitycolumnconstraint_sql( 1062 self, expression: exp.GeneratedAsIdentityColumnConstraint 1063 ) -> str: 1064 start = expression.args.get("start") 1065 start = f" START {start}" if start else "" 1066 increment = expression.args.get("increment") 1067 increment = f" INCREMENT {increment}" if increment else "" 1068 return f"AUTOINCREMENT{start}{increment}" 1069 1070 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1071 this = self.sql(expression, "this") 1072 return f"SWAP WITH {this}" 1073 1074 def cluster_sql(self, expression: exp.Cluster) -> str: 1075 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1076 1077 def struct_sql(self, expression: exp.Struct) -> str: 1078 keys = [] 1079 values = [] 1080 1081 for i, e in enumerate(expression.expressions): 1082 if isinstance(e, exp.PropertyEQ): 1083 keys.append( 1084 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1085 ) 1086 values.append(e.expression) 1087 else: 1088 keys.append(exp.Literal.string(f"_{i}")) 1089 values.append(e) 1090 1091 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1092 1093 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1094 if expression.args.get("weight") or expression.args.get("accuracy"): 1095 self.unsupported( 1096 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1097 ) 1098 1099 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1100 1101 def alterset_sql(self, expression: exp.AlterSet) -> str: 1102 exprs = self.expressions(expression, flat=True) 1103 exprs = f" {exprs}" if exprs else "" 1104 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1105 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1106 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1107 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1108 tag = self.expressions(expression, key="tag", flat=True) 1109 tag = f" TAG {tag}" if tag else "" 1110 1111 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
926 def datatype_sql(self, expression: exp.DataType) -> str: 927 expressions = expression.expressions 928 if ( 929 expressions 930 and expression.is_type(*exp.DataType.STRUCT_TYPES) 931 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 932 ): 933 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 934 return "OBJECT" 935 936 return super().datatype_sql(expression)
947 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 948 milli = expression.args.get("milli") 949 if milli is not None: 950 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 951 expression.set("nano", milli_to_nano) 952 953 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
955 def trycast_sql(self, expression: exp.TryCast) -> str: 956 value = expression.this 957 958 if value.type is None: 959 from sqlglot.optimizer.annotate_types import annotate_types 960 961 value = annotate_types(value) 962 963 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 964 return super().trycast_sql(expression) 965 966 # TRY_CAST only works for string values in Snowflake 967 return self.cast_sql(expression)
975 def unnest_sql(self, expression: exp.Unnest) -> str: 976 unnest_alias = expression.args.get("alias") 977 offset = expression.args.get("offset") 978 979 columns = [ 980 exp.to_identifier("seq"), 981 exp.to_identifier("key"), 982 exp.to_identifier("path"), 983 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 984 seq_get(unnest_alias.columns if unnest_alias else [], 0) 985 or exp.to_identifier("value"), 986 exp.to_identifier("this"), 987 ] 988 989 if unnest_alias: 990 unnest_alias.set("columns", columns) 991 else: 992 unnest_alias = exp.TableAlias(this="_u", columns=columns) 993 994 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 995 alias = self.sql(unnest_alias) 996 alias = f" AS {alias}" if alias else "" 997 return f"{explode}{alias}"
999 def show_sql(self, expression: exp.Show) -> str: 1000 terse = "TERSE " if expression.args.get("terse") else "" 1001 history = " HISTORY" if expression.args.get("history") else "" 1002 like = self.sql(expression, "like") 1003 like = f" LIKE {like}" if like else "" 1004 1005 scope = self.sql(expression, "scope") 1006 scope = f" {scope}" if scope else "" 1007 1008 scope_kind = self.sql(expression, "scope_kind") 1009 if scope_kind: 1010 scope_kind = f" IN {scope_kind}" 1011 1012 starts_with = self.sql(expression, "starts_with") 1013 if starts_with: 1014 starts_with = f" STARTS WITH {starts_with}" 1015 1016 limit = self.sql(expression, "limit") 1017 1018 from_ = self.sql(expression, "from") 1019 if from_: 1020 from_ = f" FROM {from_}" 1021 1022 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1024 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1025 # Other dialects don't support all of the following parameters, so we need to 1026 # generate default values as necessary to ensure the transpilation is correct 1027 group = expression.args.get("group") 1028 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1029 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1030 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1031 1032 return self.func( 1033 "REGEXP_SUBSTR", 1034 expression.this, 1035 expression.expression, 1036 position, 1037 occurrence, 1038 parameters, 1039 group, 1040 )
1052 def describe_sql(self, expression: exp.Describe) -> str: 1053 # Default to table if kind is unknown 1054 kind_value = expression.args.get("kind") or "TABLE" 1055 kind = f" {kind_value}" if kind_value else "" 1056 this = f" {self.sql(expression, 'this')}" 1057 expressions = self.expressions(expression, flat=True) 1058 expressions = f" {expressions}" if expressions else "" 1059 return f"DESCRIBE{kind}{this}{expressions}"
1061 def generatedasidentitycolumnconstraint_sql( 1062 self, expression: exp.GeneratedAsIdentityColumnConstraint 1063 ) -> str: 1064 start = expression.args.get("start") 1065 start = f" START {start}" if start else "" 1066 increment = expression.args.get("increment") 1067 increment = f" INCREMENT {increment}" if increment else "" 1068 return f"AUTOINCREMENT{start}{increment}"
1077 def struct_sql(self, expression: exp.Struct) -> str: 1078 keys = [] 1079 values = [] 1080 1081 for i, e in enumerate(expression.expressions): 1082 if isinstance(e, exp.PropertyEQ): 1083 keys.append( 1084 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1085 ) 1086 values.append(e.expression) 1087 else: 1088 keys.append(exp.Literal.string(f"_{i}")) 1089 values.append(e) 1090 1091 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1093 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1094 if expression.args.get("weight") or expression.args.get("accuracy"): 1095 self.unsupported( 1096 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1097 ) 1098 1099 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1101 def alterset_sql(self, expression: exp.AlterSet) -> str: 1102 exprs = self.expressions(expression, flat=True) 1103 exprs = f" {exprs}" if exprs else "" 1104 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1105 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1106 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1107 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1108 tag = self.expressions(expression, key="tag", flat=True) 1109 tag = f" TAG {tag}" if tag else "" 1110 1111 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql