sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 build_timestamp_from_parts, 17 filter_array_using_unnest, 18 if_sql, 19 inline_array_unless_query, 20 max_or_greatest, 21 min_or_least, 22 no_ilike_sql, 23 build_date_delta_with_interval, 24 regexp_replace_sql, 25 rename_func, 26 sha256_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_var, 30) 31from sqlglot.helper import seq_get, split_num_words 32from sqlglot.tokens import TokenType 33 34if t.TYPE_CHECKING: 35 from sqlglot._typing import E, Lit 36 37logger = logging.getLogger("sqlglot") 38 39 40def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 41 if not expression.find_ancestor(exp.From, exp.Join): 42 return self.values_sql(expression) 43 44 structs = [] 45 alias = expression.args.get("alias") 46 for tup in expression.find_all(exp.Tuple): 47 field_aliases = ( 48 alias.columns 49 if alias and alias.columns 50 else (f"_c{i}" for i in range(len(tup.expressions))) 51 ) 52 expressions = [ 53 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 54 for name, fld in zip(field_aliases, tup.expressions) 55 ] 56 structs.append(exp.Struct(expressions=expressions)) 57 58 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 59 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 60 return self.unnest_sql( 61 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 62 ) 63 64 65def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 66 this = expression.this 67 if isinstance(this, exp.Schema): 68 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 69 else: 70 this = self.sql(this) 71 return f"RETURNS {this}" 72 73 74def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 75 returns = expression.find(exp.ReturnsProperty) 76 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 77 expression.set("kind", "TABLE FUNCTION") 78 79 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 80 expression.set("expression", expression.expression.this) 81 82 return self.create_sql(expression) 83 84 85# https://issuetracker.google.com/issues/162294746 86# workaround for bigquery bug when grouping by an expression and then ordering 87# WITH x AS (SELECT 1 y) 88# SELECT y + 1 z 89# FROM x 90# GROUP BY x + 1 91# ORDER by z 92def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 93 if isinstance(expression, exp.Select): 94 group = expression.args.get("group") 95 order = expression.args.get("order") 96 97 if group and order: 98 aliases = { 99 select.this: select.args["alias"] 100 for select in expression.selects 101 if isinstance(select, exp.Alias) 102 } 103 104 for grouped in group.expressions: 105 if grouped.is_int: 106 continue 107 alias = aliases.get(grouped) 108 if alias: 109 grouped.replace(exp.column(alias)) 110 111 return expression 112 113 114def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 115 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 116 if isinstance(expression, exp.CTE) and expression.alias_column_names: 117 cte_query = expression.this 118 119 if cte_query.is_star: 120 logger.warning( 121 "Can't push down CTE column names for star queries. Run the query through" 122 " the optimizer or use 'qualify' to expand the star projections first." 123 ) 124 return expression 125 126 column_names = expression.alias_column_names 127 expression.args["alias"].set("columns", None) 128 129 for name, select in zip(column_names, cte_query.selects): 130 to_replace = select 131 132 if isinstance(select, exp.Alias): 133 select = select.this 134 135 # Inner aliases are shadowed by the CTE column names 136 to_replace.replace(exp.alias_(select, name)) 137 138 return expression 139 140 141def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 142 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 143 this.set("zone", seq_get(args, 2)) 144 return this 145 146 147def _build_timestamp(args: t.List) -> exp.Timestamp: 148 timestamp = exp.Timestamp.from_arg_list(args) 149 timestamp.set("with_tz", True) 150 return timestamp 151 152 153def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 154 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 155 return expr_type.from_arg_list(args) 156 157 158def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 159 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 160 arg = seq_get(args, 0) 161 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 162 163 164def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 165 return self.sql( 166 exp.Exists( 167 this=exp.select("1") 168 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 169 .where(exp.column("_col").eq(expression.right)) 170 ) 171 ) 172 173 174def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 175 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 176 177 178def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 179 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 180 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 181 unit = unit_to_var(expression) 182 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 183 184 185def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 186 scale = expression.args.get("scale") 187 timestamp = expression.this 188 189 if scale in (None, exp.UnixToTime.SECONDS): 190 return self.func("TIMESTAMP_SECONDS", timestamp) 191 if scale == exp.UnixToTime.MILLIS: 192 return self.func("TIMESTAMP_MILLIS", timestamp) 193 if scale == exp.UnixToTime.MICROS: 194 return self.func("TIMESTAMP_MICROS", timestamp) 195 196 unix_seconds = exp.cast( 197 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 198 ) 199 return self.func("TIMESTAMP_SECONDS", unix_seconds) 200 201 202def _build_time(args: t.List) -> exp.Func: 203 if len(args) == 1: 204 return exp.TsOrDsToTime(this=args[0]) 205 if len(args) == 3: 206 return exp.TimeFromParts.from_arg_list(args) 207 208 return exp.Anonymous(this="TIME", expressions=args) 209 210 211class BigQuery(Dialect): 212 WEEK_OFFSET = -1 213 UNNEST_COLUMN_ONLY = True 214 SUPPORTS_USER_DEFINED_TYPES = False 215 SUPPORTS_SEMI_ANTI_JOIN = False 216 LOG_BASE_FIRST = False 217 HEX_LOWERCASE = True 218 219 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 220 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 221 222 # bigquery udfs are case sensitive 223 NORMALIZE_FUNCTIONS = False 224 225 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 226 TIME_MAPPING = { 227 "%D": "%m/%d/%y", 228 "%E6S": "%S.%f", 229 } 230 231 FORMAT_MAPPING = { 232 "DD": "%d", 233 "MM": "%m", 234 "MON": "%b", 235 "MONTH": "%B", 236 "YYYY": "%Y", 237 "YY": "%y", 238 "HH": "%I", 239 "HH12": "%I", 240 "HH24": "%H", 241 "MI": "%M", 242 "SS": "%S", 243 "SSSSS": "%f", 244 "TZH": "%z", 245 } 246 247 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 248 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 249 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 250 251 def normalize_identifier(self, expression: E) -> E: 252 if isinstance(expression, exp.Identifier): 253 parent = expression.parent 254 while isinstance(parent, exp.Dot): 255 parent = parent.parent 256 257 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 258 # by default. The following check uses a heuristic to detect tables based on whether 259 # they are qualified. This should generally be correct, because tables in BigQuery 260 # must be qualified with at least a dataset, unless @@dataset_id is set. 261 case_sensitive = ( 262 isinstance(parent, exp.UserDefinedFunction) 263 or ( 264 isinstance(parent, exp.Table) 265 and parent.db 266 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 267 ) 268 or expression.meta.get("is_table") 269 ) 270 if not case_sensitive: 271 expression.set("this", expression.this.lower()) 272 273 return expression 274 275 class Tokenizer(tokens.Tokenizer): 276 QUOTES = ["'", '"', '"""', "'''"] 277 COMMENTS = ["--", "#", ("/*", "*/")] 278 IDENTIFIERS = ["`"] 279 STRING_ESCAPES = ["\\"] 280 281 HEX_STRINGS = [("0x", ""), ("0X", "")] 282 283 BYTE_STRINGS = [ 284 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 285 ] 286 287 RAW_STRINGS = [ 288 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 289 ] 290 291 KEYWORDS = { 292 **tokens.Tokenizer.KEYWORDS, 293 "ANY TYPE": TokenType.VARIANT, 294 "BEGIN": TokenType.COMMAND, 295 "BEGIN TRANSACTION": TokenType.BEGIN, 296 "BYTES": TokenType.BINARY, 297 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 298 "DATETIME": TokenType.TIMESTAMP, 299 "DECLARE": TokenType.COMMAND, 300 "ELSEIF": TokenType.COMMAND, 301 "EXCEPTION": TokenType.COMMAND, 302 "FLOAT64": TokenType.DOUBLE, 303 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 304 "MODEL": TokenType.MODEL, 305 "NOT DETERMINISTIC": TokenType.VOLATILE, 306 "RECORD": TokenType.STRUCT, 307 "TIMESTAMP": TokenType.TIMESTAMPTZ, 308 } 309 KEYWORDS.pop("DIV") 310 KEYWORDS.pop("VALUES") 311 312 class Parser(parser.Parser): 313 PREFIXED_PIVOT_COLUMNS = True 314 LOG_DEFAULTS_TO_LN = True 315 SUPPORTS_IMPLICIT_UNNEST = True 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "DATE": _build_date, 320 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 321 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 322 "DATE_TRUNC": lambda args: exp.DateTrunc( 323 unit=exp.Literal.string(str(seq_get(args, 1))), 324 this=seq_get(args, 0), 325 ), 326 "DATETIME": build_timestamp_from_parts, 327 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 328 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 329 "DIV": binary_from_function(exp.IntDiv), 330 "FORMAT_DATE": lambda args: exp.TimeToStr( 331 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 332 ), 333 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 334 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 335 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 336 ), 337 "MD5": exp.MD5Digest.from_arg_list, 338 "TO_HEX": _build_to_hex, 339 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 340 [seq_get(args, 1), seq_get(args, 0)] 341 ), 342 "PARSE_TIMESTAMP": _build_parse_timestamp, 343 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 344 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 345 this=seq_get(args, 0), 346 expression=seq_get(args, 1), 347 position=seq_get(args, 2), 348 occurrence=seq_get(args, 3), 349 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 350 ), 351 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 352 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 353 "SPLIT": lambda args: exp.Split( 354 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 355 this=seq_get(args, 0), 356 expression=seq_get(args, 1) or exp.Literal.string(","), 357 ), 358 "TIME": _build_time, 359 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 360 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 361 "TIMESTAMP": _build_timestamp, 362 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 363 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 364 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 366 ), 367 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 368 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 369 ), 370 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 371 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 372 } 373 374 FUNCTION_PARSERS = { 375 **parser.Parser.FUNCTION_PARSERS, 376 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 377 } 378 FUNCTION_PARSERS.pop("TRIM") 379 380 NO_PAREN_FUNCTIONS = { 381 **parser.Parser.NO_PAREN_FUNCTIONS, 382 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 383 } 384 385 NESTED_TYPE_TOKENS = { 386 *parser.Parser.NESTED_TYPE_TOKENS, 387 TokenType.TABLE, 388 } 389 390 PROPERTY_PARSERS = { 391 **parser.Parser.PROPERTY_PARSERS, 392 "NOT DETERMINISTIC": lambda self: self.expression( 393 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 394 ), 395 "OPTIONS": lambda self: self._parse_with_property(), 396 } 397 398 CONSTRAINT_PARSERS = { 399 **parser.Parser.CONSTRAINT_PARSERS, 400 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 401 } 402 403 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 404 RANGE_PARSERS.pop(TokenType.OVERLAPS) 405 406 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 407 408 STATEMENT_PARSERS = { 409 **parser.Parser.STATEMENT_PARSERS, 410 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 411 TokenType.END: lambda self: self._parse_as_command(self._prev), 412 TokenType.FOR: lambda self: self._parse_for_in(), 413 } 414 415 BRACKET_OFFSETS = { 416 "OFFSET": (0, False), 417 "ORDINAL": (1, False), 418 "SAFE_OFFSET": (0, True), 419 "SAFE_ORDINAL": (1, True), 420 } 421 422 def _parse_for_in(self) -> exp.ForIn: 423 this = self._parse_range() 424 self._match_text_seq("DO") 425 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 426 427 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 428 this = super()._parse_table_part(schema=schema) or self._parse_number() 429 430 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 431 if isinstance(this, exp.Identifier): 432 table_name = this.name 433 while self._match(TokenType.DASH, advance=False) and self._next: 434 text = "" 435 while self._curr and self._curr.token_type != TokenType.DOT: 436 self._advance() 437 text += self._prev.text 438 table_name += text 439 440 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 441 elif isinstance(this, exp.Literal): 442 table_name = this.name 443 444 if self._is_connected() and self._parse_var(any_token=True): 445 table_name += self._prev.text 446 447 this = exp.Identifier(this=table_name, quoted=True) 448 449 return this 450 451 def _parse_table_parts( 452 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 453 ) -> exp.Table: 454 table = super()._parse_table_parts( 455 schema=schema, is_db_reference=is_db_reference, wildcard=True 456 ) 457 458 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 459 if not table.catalog: 460 if table.db: 461 parts = table.db.split(".") 462 if len(parts) == 2 and not table.args["db"].quoted: 463 table.set("catalog", exp.Identifier(this=parts[0])) 464 table.set("db", exp.Identifier(this=parts[1])) 465 else: 466 parts = table.name.split(".") 467 if len(parts) == 2 and not table.this.quoted: 468 table.set("db", exp.Identifier(this=parts[0])) 469 table.set("this", exp.Identifier(this=parts[1])) 470 471 if any("." in p.name for p in table.parts): 472 catalog, db, this, *rest = ( 473 exp.to_identifier(p, quoted=True) 474 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 475 ) 476 477 if rest and this: 478 this = exp.Dot.build([this, *rest]) # type: ignore 479 480 table = exp.Table( 481 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 482 ) 483 table.meta["quoted_table"] = True 484 485 return table 486 487 def _parse_column(self) -> t.Optional[exp.Expression]: 488 column = super()._parse_column() 489 if isinstance(column, exp.Column): 490 parts = column.parts 491 if any("." in p.name for p in parts): 492 catalog, db, table, this, *rest = ( 493 exp.to_identifier(p, quoted=True) 494 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 495 ) 496 497 if rest and this: 498 this = exp.Dot.build([this, *rest]) # type: ignore 499 500 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 501 column.meta["quoted_column"] = True 502 503 return column 504 505 @t.overload 506 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 507 508 @t.overload 509 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 510 511 def _parse_json_object(self, agg=False): 512 json_object = super()._parse_json_object() 513 array_kv_pair = seq_get(json_object.expressions, 0) 514 515 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 516 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 517 if ( 518 array_kv_pair 519 and isinstance(array_kv_pair.this, exp.Array) 520 and isinstance(array_kv_pair.expression, exp.Array) 521 ): 522 keys = array_kv_pair.this.expressions 523 values = array_kv_pair.expression.expressions 524 525 json_object.set( 526 "expressions", 527 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 528 ) 529 530 return json_object 531 532 def _parse_bracket( 533 self, this: t.Optional[exp.Expression] = None 534 ) -> t.Optional[exp.Expression]: 535 bracket = super()._parse_bracket(this) 536 537 if this is bracket: 538 return bracket 539 540 if isinstance(bracket, exp.Bracket): 541 for expression in bracket.expressions: 542 name = expression.name.upper() 543 544 if name not in self.BRACKET_OFFSETS: 545 break 546 547 offset, safe = self.BRACKET_OFFSETS[name] 548 bracket.set("offset", offset) 549 bracket.set("safe", safe) 550 expression.replace(expression.expressions[0]) 551 552 return bracket 553 554 class Generator(generator.Generator): 555 EXPLICIT_UNION = True 556 INTERVAL_ALLOWS_PLURAL_FORM = False 557 JOIN_HINTS = False 558 QUERY_HINTS = False 559 TABLE_HINTS = False 560 LIMIT_FETCH = "LIMIT" 561 RENAME_TABLE_WITH_DB = False 562 NVL2_SUPPORTED = False 563 UNNEST_WITH_ORDINALITY = False 564 COLLATE_IS_FUNC = True 565 LIMIT_ONLY_LITERALS = True 566 SUPPORTS_TABLE_ALIAS_COLUMNS = False 567 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 568 JSON_KEY_VALUE_PAIR_SEP = "," 569 NULL_ORDERING_SUPPORTED = False 570 IGNORE_NULLS_IN_FUNC = True 571 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 572 CAN_IMPLEMENT_ARRAY_ANY = True 573 SUPPORTS_TO_NUMBER = False 574 NAMED_PLACEHOLDER_TOKEN = "@" 575 HEX_FUNC = "TO_HEX" 576 WITH_PROPERTIES_PREFIX = "OPTIONS" 577 578 TRANSFORMS = { 579 **generator.Generator.TRANSFORMS, 580 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 581 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 582 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 583 exp.Array: inline_array_unless_query, 584 exp.ArrayContains: _array_contains_sql, 585 exp.ArrayFilter: filter_array_using_unnest, 586 exp.ArraySize: rename_func("ARRAY_LENGTH"), 587 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 588 exp.CollateProperty: lambda self, e: ( 589 f"DEFAULT COLLATE {self.sql(e, 'this')}" 590 if e.args.get("default") 591 else f"COLLATE {self.sql(e, 'this')}" 592 ), 593 exp.Commit: lambda *_: "COMMIT TRANSACTION", 594 exp.CountIf: rename_func("COUNTIF"), 595 exp.Create: _create_sql, 596 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 597 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 598 exp.DateDiff: lambda self, e: self.func( 599 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 600 ), 601 exp.DateFromParts: rename_func("DATE"), 602 exp.DateStrToDate: datestrtodate_sql, 603 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 604 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 605 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 606 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 607 exp.FromTimeZone: lambda self, e: self.func( 608 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 609 ), 610 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 611 exp.GroupConcat: rename_func("STRING_AGG"), 612 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 613 exp.If: if_sql(false_value="NULL"), 614 exp.ILike: no_ilike_sql, 615 exp.IntDiv: rename_func("DIV"), 616 exp.JSONFormat: rename_func("TO_JSON_STRING"), 617 exp.Max: max_or_greatest, 618 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 619 exp.MD5Digest: rename_func("MD5"), 620 exp.Min: min_or_least, 621 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 622 exp.RegexpExtract: lambda self, e: self.func( 623 "REGEXP_EXTRACT", 624 e.this, 625 e.expression, 626 e.args.get("position"), 627 e.args.get("occurrence"), 628 ), 629 exp.RegexpReplace: regexp_replace_sql, 630 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 631 exp.ReturnsProperty: _returnsproperty_sql, 632 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 633 exp.Select: transforms.preprocess( 634 [ 635 transforms.explode_to_unnest(), 636 transforms.unqualify_unnest, 637 transforms.eliminate_distinct_on, 638 _alias_ordered_group, 639 transforms.eliminate_semi_and_anti_joins, 640 ] 641 ), 642 exp.SHA: rename_func("SHA1"), 643 exp.SHA2: sha256_sql, 644 exp.StabilityProperty: lambda self, e: ( 645 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 646 ), 647 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 648 exp.StrToTime: lambda self, e: self.func( 649 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 650 ), 651 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 652 exp.TimeFromParts: rename_func("TIME"), 653 exp.TimestampFromParts: rename_func("DATETIME"), 654 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 655 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 656 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 657 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 658 exp.TimeStrToTime: timestrtotime_sql, 659 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 660 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 661 exp.TsOrDsAdd: _ts_or_ds_add_sql, 662 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 663 exp.TsOrDsToTime: rename_func("TIME"), 664 exp.Unhex: rename_func("FROM_HEX"), 665 exp.UnixDate: rename_func("UNIX_DATE"), 666 exp.UnixToTime: _unix_to_time_sql, 667 exp.Values: _derived_table_values_to_unnest, 668 exp.VariancePop: rename_func("VAR_POP"), 669 } 670 671 SUPPORTED_JSON_PATH_PARTS = { 672 exp.JSONPathKey, 673 exp.JSONPathRoot, 674 exp.JSONPathSubscript, 675 } 676 677 TYPE_MAPPING = { 678 **generator.Generator.TYPE_MAPPING, 679 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 680 exp.DataType.Type.BIGINT: "INT64", 681 exp.DataType.Type.BINARY: "BYTES", 682 exp.DataType.Type.BOOLEAN: "BOOL", 683 exp.DataType.Type.CHAR: "STRING", 684 exp.DataType.Type.DECIMAL: "NUMERIC", 685 exp.DataType.Type.DOUBLE: "FLOAT64", 686 exp.DataType.Type.FLOAT: "FLOAT64", 687 exp.DataType.Type.INT: "INT64", 688 exp.DataType.Type.NCHAR: "STRING", 689 exp.DataType.Type.NVARCHAR: "STRING", 690 exp.DataType.Type.SMALLINT: "INT64", 691 exp.DataType.Type.TEXT: "STRING", 692 exp.DataType.Type.TIMESTAMP: "DATETIME", 693 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 694 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 695 exp.DataType.Type.TINYINT: "INT64", 696 exp.DataType.Type.VARBINARY: "BYTES", 697 exp.DataType.Type.ROWVERSION: "BYTES", 698 exp.DataType.Type.VARCHAR: "STRING", 699 exp.DataType.Type.VARIANT: "ANY TYPE", 700 } 701 702 PROPERTIES_LOCATION = { 703 **generator.Generator.PROPERTIES_LOCATION, 704 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 705 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 706 } 707 708 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 709 RESERVED_KEYWORDS = { 710 "all", 711 "and", 712 "any", 713 "array", 714 "as", 715 "asc", 716 "assert_rows_modified", 717 "at", 718 "between", 719 "by", 720 "case", 721 "cast", 722 "collate", 723 "contains", 724 "create", 725 "cross", 726 "cube", 727 "current", 728 "default", 729 "define", 730 "desc", 731 "distinct", 732 "else", 733 "end", 734 "enum", 735 "escape", 736 "except", 737 "exclude", 738 "exists", 739 "extract", 740 "false", 741 "fetch", 742 "following", 743 "for", 744 "from", 745 "full", 746 "group", 747 "grouping", 748 "groups", 749 "hash", 750 "having", 751 "if", 752 "ignore", 753 "in", 754 "inner", 755 "intersect", 756 "interval", 757 "into", 758 "is", 759 "join", 760 "lateral", 761 "left", 762 "like", 763 "limit", 764 "lookup", 765 "merge", 766 "natural", 767 "new", 768 "no", 769 "not", 770 "null", 771 "nulls", 772 "of", 773 "on", 774 "or", 775 "order", 776 "outer", 777 "over", 778 "partition", 779 "preceding", 780 "proto", 781 "qualify", 782 "range", 783 "recursive", 784 "respect", 785 "right", 786 "rollup", 787 "rows", 788 "select", 789 "set", 790 "some", 791 "struct", 792 "tablesample", 793 "then", 794 "to", 795 "treat", 796 "true", 797 "unbounded", 798 "union", 799 "unnest", 800 "using", 801 "when", 802 "where", 803 "window", 804 "with", 805 "within", 806 } 807 808 def mod_sql(self, expression: exp.Mod) -> str: 809 this = expression.this 810 expr = expression.expression 811 return self.func( 812 "MOD", 813 this.unnest() if isinstance(this, exp.Paren) else this, 814 expr.unnest() if isinstance(expr, exp.Paren) else expr, 815 ) 816 817 def column_parts(self, expression: exp.Column) -> str: 818 if expression.meta.get("quoted_column"): 819 # If a column reference is of the form `dataset.table`.name, we need 820 # to preserve the quoted table path, otherwise the reference breaks 821 table_parts = ".".join(p.name for p in expression.parts[:-1]) 822 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 823 return f"{table_path}.{self.sql(expression, 'this')}" 824 825 return super().column_parts(expression) 826 827 def table_parts(self, expression: exp.Table) -> str: 828 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 829 # we need to make sure the correct quoting is used in each case. 830 # 831 # For example, if there is a CTE x that clashes with a schema name, then the former will 832 # return the table y in that schema, whereas the latter will return the CTE's y column: 833 # 834 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 835 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 836 if expression.meta.get("quoted_table"): 837 table_parts = ".".join(p.name for p in expression.parts) 838 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 839 840 return super().table_parts(expression) 841 842 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 843 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 844 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 845 846 def eq_sql(self, expression: exp.EQ) -> str: 847 # Operands of = cannot be NULL in BigQuery 848 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 849 if not isinstance(expression.parent, exp.Update): 850 return "NULL" 851 852 return self.binary(expression, "=") 853 854 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 855 parent = expression.parent 856 857 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 858 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 859 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 860 return self.func( 861 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 862 ) 863 864 return super().attimezone_sql(expression) 865 866 def trycast_sql(self, expression: exp.TryCast) -> str: 867 return self.cast_sql(expression, safe_prefix="SAFE_") 868 869 def bracket_sql(self, expression: exp.Bracket) -> str: 870 this = expression.this 871 expressions = expression.expressions 872 873 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 874 arg = expressions[0] 875 if arg.type is None: 876 from sqlglot.optimizer.annotate_types import annotate_types 877 878 arg = annotate_types(arg) 879 880 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 881 # BQ doesn't support bracket syntax with string values for structs 882 return f"{self.sql(this)}.{arg.name}" 883 884 expressions_sql = self.expressions(expression, flat=True) 885 offset = expression.args.get("offset") 886 887 if offset == 0: 888 expressions_sql = f"OFFSET({expressions_sql})" 889 elif offset == 1: 890 expressions_sql = f"ORDINAL({expressions_sql})" 891 elif offset is not None: 892 self.unsupported(f"Unsupported array offset: {offset}") 893 894 if expression.args.get("safe"): 895 expressions_sql = f"SAFE_{expressions_sql}" 896 897 return f"{self.sql(this)}[{expressions_sql}]" 898 899 def in_unnest_op(self, expression: exp.Unnest) -> str: 900 return self.sql(expression) 901 902 def except_op(self, expression: exp.Except) -> str: 903 if not expression.args.get("distinct"): 904 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 905 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 906 907 def intersect_op(self, expression: exp.Intersect) -> str: 908 if not expression.args.get("distinct"): 909 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 910 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 911 912 def version_sql(self, expression: exp.Version) -> str: 913 if expression.name == "TIMESTAMP": 914 expression.set("this", "SYSTEM_TIME") 915 return super().version_sql(expression)
212class BigQuery(Dialect): 213 WEEK_OFFSET = -1 214 UNNEST_COLUMN_ONLY = True 215 SUPPORTS_USER_DEFINED_TYPES = False 216 SUPPORTS_SEMI_ANTI_JOIN = False 217 LOG_BASE_FIRST = False 218 HEX_LOWERCASE = True 219 220 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 221 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 222 223 # bigquery udfs are case sensitive 224 NORMALIZE_FUNCTIONS = False 225 226 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 227 TIME_MAPPING = { 228 "%D": "%m/%d/%y", 229 "%E6S": "%S.%f", 230 } 231 232 FORMAT_MAPPING = { 233 "DD": "%d", 234 "MM": "%m", 235 "MON": "%b", 236 "MONTH": "%B", 237 "YYYY": "%Y", 238 "YY": "%y", 239 "HH": "%I", 240 "HH12": "%I", 241 "HH24": "%H", 242 "MI": "%M", 243 "SS": "%S", 244 "SSSSS": "%f", 245 "TZH": "%z", 246 } 247 248 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 249 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 250 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 251 252 def normalize_identifier(self, expression: E) -> E: 253 if isinstance(expression, exp.Identifier): 254 parent = expression.parent 255 while isinstance(parent, exp.Dot): 256 parent = parent.parent 257 258 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 259 # by default. The following check uses a heuristic to detect tables based on whether 260 # they are qualified. This should generally be correct, because tables in BigQuery 261 # must be qualified with at least a dataset, unless @@dataset_id is set. 262 case_sensitive = ( 263 isinstance(parent, exp.UserDefinedFunction) 264 or ( 265 isinstance(parent, exp.Table) 266 and parent.db 267 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 268 ) 269 or expression.meta.get("is_table") 270 ) 271 if not case_sensitive: 272 expression.set("this", expression.this.lower()) 273 274 return expression 275 276 class Tokenizer(tokens.Tokenizer): 277 QUOTES = ["'", '"', '"""', "'''"] 278 COMMENTS = ["--", "#", ("/*", "*/")] 279 IDENTIFIERS = ["`"] 280 STRING_ESCAPES = ["\\"] 281 282 HEX_STRINGS = [("0x", ""), ("0X", "")] 283 284 BYTE_STRINGS = [ 285 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 286 ] 287 288 RAW_STRINGS = [ 289 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 290 ] 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "ANY TYPE": TokenType.VARIANT, 295 "BEGIN": TokenType.COMMAND, 296 "BEGIN TRANSACTION": TokenType.BEGIN, 297 "BYTES": TokenType.BINARY, 298 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 299 "DATETIME": TokenType.TIMESTAMP, 300 "DECLARE": TokenType.COMMAND, 301 "ELSEIF": TokenType.COMMAND, 302 "EXCEPTION": TokenType.COMMAND, 303 "FLOAT64": TokenType.DOUBLE, 304 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 305 "MODEL": TokenType.MODEL, 306 "NOT DETERMINISTIC": TokenType.VOLATILE, 307 "RECORD": TokenType.STRUCT, 308 "TIMESTAMP": TokenType.TIMESTAMPTZ, 309 } 310 KEYWORDS.pop("DIV") 311 KEYWORDS.pop("VALUES") 312 313 class Parser(parser.Parser): 314 PREFIXED_PIVOT_COLUMNS = True 315 LOG_DEFAULTS_TO_LN = True 316 SUPPORTS_IMPLICIT_UNNEST = True 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "DATE": _build_date, 321 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 322 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 323 "DATE_TRUNC": lambda args: exp.DateTrunc( 324 unit=exp.Literal.string(str(seq_get(args, 1))), 325 this=seq_get(args, 0), 326 ), 327 "DATETIME": build_timestamp_from_parts, 328 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 329 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 330 "DIV": binary_from_function(exp.IntDiv), 331 "FORMAT_DATE": lambda args: exp.TimeToStr( 332 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 333 ), 334 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 335 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 336 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 337 ), 338 "MD5": exp.MD5Digest.from_arg_list, 339 "TO_HEX": _build_to_hex, 340 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 341 [seq_get(args, 1), seq_get(args, 0)] 342 ), 343 "PARSE_TIMESTAMP": _build_parse_timestamp, 344 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 345 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 346 this=seq_get(args, 0), 347 expression=seq_get(args, 1), 348 position=seq_get(args, 2), 349 occurrence=seq_get(args, 3), 350 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 351 ), 352 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 353 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 354 "SPLIT": lambda args: exp.Split( 355 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1) or exp.Literal.string(","), 358 ), 359 "TIME": _build_time, 360 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 361 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 362 "TIMESTAMP": _build_timestamp, 363 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 364 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 365 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 367 ), 368 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 370 ), 371 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 372 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 373 } 374 375 FUNCTION_PARSERS = { 376 **parser.Parser.FUNCTION_PARSERS, 377 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 378 } 379 FUNCTION_PARSERS.pop("TRIM") 380 381 NO_PAREN_FUNCTIONS = { 382 **parser.Parser.NO_PAREN_FUNCTIONS, 383 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 384 } 385 386 NESTED_TYPE_TOKENS = { 387 *parser.Parser.NESTED_TYPE_TOKENS, 388 TokenType.TABLE, 389 } 390 391 PROPERTY_PARSERS = { 392 **parser.Parser.PROPERTY_PARSERS, 393 "NOT DETERMINISTIC": lambda self: self.expression( 394 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 395 ), 396 "OPTIONS": lambda self: self._parse_with_property(), 397 } 398 399 CONSTRAINT_PARSERS = { 400 **parser.Parser.CONSTRAINT_PARSERS, 401 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 402 } 403 404 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 405 RANGE_PARSERS.pop(TokenType.OVERLAPS) 406 407 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 408 409 STATEMENT_PARSERS = { 410 **parser.Parser.STATEMENT_PARSERS, 411 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 412 TokenType.END: lambda self: self._parse_as_command(self._prev), 413 TokenType.FOR: lambda self: self._parse_for_in(), 414 } 415 416 BRACKET_OFFSETS = { 417 "OFFSET": (0, False), 418 "ORDINAL": (1, False), 419 "SAFE_OFFSET": (0, True), 420 "SAFE_ORDINAL": (1, True), 421 } 422 423 def _parse_for_in(self) -> exp.ForIn: 424 this = self._parse_range() 425 self._match_text_seq("DO") 426 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 427 428 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 429 this = super()._parse_table_part(schema=schema) or self._parse_number() 430 431 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 432 if isinstance(this, exp.Identifier): 433 table_name = this.name 434 while self._match(TokenType.DASH, advance=False) and self._next: 435 text = "" 436 while self._curr and self._curr.token_type != TokenType.DOT: 437 self._advance() 438 text += self._prev.text 439 table_name += text 440 441 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 442 elif isinstance(this, exp.Literal): 443 table_name = this.name 444 445 if self._is_connected() and self._parse_var(any_token=True): 446 table_name += self._prev.text 447 448 this = exp.Identifier(this=table_name, quoted=True) 449 450 return this 451 452 def _parse_table_parts( 453 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 454 ) -> exp.Table: 455 table = super()._parse_table_parts( 456 schema=schema, is_db_reference=is_db_reference, wildcard=True 457 ) 458 459 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 460 if not table.catalog: 461 if table.db: 462 parts = table.db.split(".") 463 if len(parts) == 2 and not table.args["db"].quoted: 464 table.set("catalog", exp.Identifier(this=parts[0])) 465 table.set("db", exp.Identifier(this=parts[1])) 466 else: 467 parts = table.name.split(".") 468 if len(parts) == 2 and not table.this.quoted: 469 table.set("db", exp.Identifier(this=parts[0])) 470 table.set("this", exp.Identifier(this=parts[1])) 471 472 if any("." in p.name for p in table.parts): 473 catalog, db, this, *rest = ( 474 exp.to_identifier(p, quoted=True) 475 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 476 ) 477 478 if rest and this: 479 this = exp.Dot.build([this, *rest]) # type: ignore 480 481 table = exp.Table( 482 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 483 ) 484 table.meta["quoted_table"] = True 485 486 return table 487 488 def _parse_column(self) -> t.Optional[exp.Expression]: 489 column = super()._parse_column() 490 if isinstance(column, exp.Column): 491 parts = column.parts 492 if any("." in p.name for p in parts): 493 catalog, db, table, this, *rest = ( 494 exp.to_identifier(p, quoted=True) 495 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 496 ) 497 498 if rest and this: 499 this = exp.Dot.build([this, *rest]) # type: ignore 500 501 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 502 column.meta["quoted_column"] = True 503 504 return column 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 508 509 @t.overload 510 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 511 512 def _parse_json_object(self, agg=False): 513 json_object = super()._parse_json_object() 514 array_kv_pair = seq_get(json_object.expressions, 0) 515 516 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 517 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 518 if ( 519 array_kv_pair 520 and isinstance(array_kv_pair.this, exp.Array) 521 and isinstance(array_kv_pair.expression, exp.Array) 522 ): 523 keys = array_kv_pair.this.expressions 524 values = array_kv_pair.expression.expressions 525 526 json_object.set( 527 "expressions", 528 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 529 ) 530 531 return json_object 532 533 def _parse_bracket( 534 self, this: t.Optional[exp.Expression] = None 535 ) -> t.Optional[exp.Expression]: 536 bracket = super()._parse_bracket(this) 537 538 if this is bracket: 539 return bracket 540 541 if isinstance(bracket, exp.Bracket): 542 for expression in bracket.expressions: 543 name = expression.name.upper() 544 545 if name not in self.BRACKET_OFFSETS: 546 break 547 548 offset, safe = self.BRACKET_OFFSETS[name] 549 bracket.set("offset", offset) 550 bracket.set("safe", safe) 551 expression.replace(expression.expressions[0]) 552 553 return bracket 554 555 class Generator(generator.Generator): 556 EXPLICIT_UNION = True 557 INTERVAL_ALLOWS_PLURAL_FORM = False 558 JOIN_HINTS = False 559 QUERY_HINTS = False 560 TABLE_HINTS = False 561 LIMIT_FETCH = "LIMIT" 562 RENAME_TABLE_WITH_DB = False 563 NVL2_SUPPORTED = False 564 UNNEST_WITH_ORDINALITY = False 565 COLLATE_IS_FUNC = True 566 LIMIT_ONLY_LITERALS = True 567 SUPPORTS_TABLE_ALIAS_COLUMNS = False 568 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 569 JSON_KEY_VALUE_PAIR_SEP = "," 570 NULL_ORDERING_SUPPORTED = False 571 IGNORE_NULLS_IN_FUNC = True 572 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 573 CAN_IMPLEMENT_ARRAY_ANY = True 574 SUPPORTS_TO_NUMBER = False 575 NAMED_PLACEHOLDER_TOKEN = "@" 576 HEX_FUNC = "TO_HEX" 577 WITH_PROPERTIES_PREFIX = "OPTIONS" 578 579 TRANSFORMS = { 580 **generator.Generator.TRANSFORMS, 581 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 582 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 583 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 584 exp.Array: inline_array_unless_query, 585 exp.ArrayContains: _array_contains_sql, 586 exp.ArrayFilter: filter_array_using_unnest, 587 exp.ArraySize: rename_func("ARRAY_LENGTH"), 588 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 589 exp.CollateProperty: lambda self, e: ( 590 f"DEFAULT COLLATE {self.sql(e, 'this')}" 591 if e.args.get("default") 592 else f"COLLATE {self.sql(e, 'this')}" 593 ), 594 exp.Commit: lambda *_: "COMMIT TRANSACTION", 595 exp.CountIf: rename_func("COUNTIF"), 596 exp.Create: _create_sql, 597 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 598 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 599 exp.DateDiff: lambda self, e: self.func( 600 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 601 ), 602 exp.DateFromParts: rename_func("DATE"), 603 exp.DateStrToDate: datestrtodate_sql, 604 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 605 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 606 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 607 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 608 exp.FromTimeZone: lambda self, e: self.func( 609 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 610 ), 611 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 612 exp.GroupConcat: rename_func("STRING_AGG"), 613 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 614 exp.If: if_sql(false_value="NULL"), 615 exp.ILike: no_ilike_sql, 616 exp.IntDiv: rename_func("DIV"), 617 exp.JSONFormat: rename_func("TO_JSON_STRING"), 618 exp.Max: max_or_greatest, 619 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 620 exp.MD5Digest: rename_func("MD5"), 621 exp.Min: min_or_least, 622 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 623 exp.RegexpExtract: lambda self, e: self.func( 624 "REGEXP_EXTRACT", 625 e.this, 626 e.expression, 627 e.args.get("position"), 628 e.args.get("occurrence"), 629 ), 630 exp.RegexpReplace: regexp_replace_sql, 631 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 632 exp.ReturnsProperty: _returnsproperty_sql, 633 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 634 exp.Select: transforms.preprocess( 635 [ 636 transforms.explode_to_unnest(), 637 transforms.unqualify_unnest, 638 transforms.eliminate_distinct_on, 639 _alias_ordered_group, 640 transforms.eliminate_semi_and_anti_joins, 641 ] 642 ), 643 exp.SHA: rename_func("SHA1"), 644 exp.SHA2: sha256_sql, 645 exp.StabilityProperty: lambda self, e: ( 646 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 647 ), 648 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 649 exp.StrToTime: lambda self, e: self.func( 650 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 651 ), 652 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 653 exp.TimeFromParts: rename_func("TIME"), 654 exp.TimestampFromParts: rename_func("DATETIME"), 655 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 656 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 657 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 658 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 659 exp.TimeStrToTime: timestrtotime_sql, 660 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 661 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 662 exp.TsOrDsAdd: _ts_or_ds_add_sql, 663 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 664 exp.TsOrDsToTime: rename_func("TIME"), 665 exp.Unhex: rename_func("FROM_HEX"), 666 exp.UnixDate: rename_func("UNIX_DATE"), 667 exp.UnixToTime: _unix_to_time_sql, 668 exp.Values: _derived_table_values_to_unnest, 669 exp.VariancePop: rename_func("VAR_POP"), 670 } 671 672 SUPPORTED_JSON_PATH_PARTS = { 673 exp.JSONPathKey, 674 exp.JSONPathRoot, 675 exp.JSONPathSubscript, 676 } 677 678 TYPE_MAPPING = { 679 **generator.Generator.TYPE_MAPPING, 680 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 681 exp.DataType.Type.BIGINT: "INT64", 682 exp.DataType.Type.BINARY: "BYTES", 683 exp.DataType.Type.BOOLEAN: "BOOL", 684 exp.DataType.Type.CHAR: "STRING", 685 exp.DataType.Type.DECIMAL: "NUMERIC", 686 exp.DataType.Type.DOUBLE: "FLOAT64", 687 exp.DataType.Type.FLOAT: "FLOAT64", 688 exp.DataType.Type.INT: "INT64", 689 exp.DataType.Type.NCHAR: "STRING", 690 exp.DataType.Type.NVARCHAR: "STRING", 691 exp.DataType.Type.SMALLINT: "INT64", 692 exp.DataType.Type.TEXT: "STRING", 693 exp.DataType.Type.TIMESTAMP: "DATETIME", 694 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 695 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 696 exp.DataType.Type.TINYINT: "INT64", 697 exp.DataType.Type.VARBINARY: "BYTES", 698 exp.DataType.Type.ROWVERSION: "BYTES", 699 exp.DataType.Type.VARCHAR: "STRING", 700 exp.DataType.Type.VARIANT: "ANY TYPE", 701 } 702 703 PROPERTIES_LOCATION = { 704 **generator.Generator.PROPERTIES_LOCATION, 705 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 706 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 707 } 708 709 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 710 RESERVED_KEYWORDS = { 711 "all", 712 "and", 713 "any", 714 "array", 715 "as", 716 "asc", 717 "assert_rows_modified", 718 "at", 719 "between", 720 "by", 721 "case", 722 "cast", 723 "collate", 724 "contains", 725 "create", 726 "cross", 727 "cube", 728 "current", 729 "default", 730 "define", 731 "desc", 732 "distinct", 733 "else", 734 "end", 735 "enum", 736 "escape", 737 "except", 738 "exclude", 739 "exists", 740 "extract", 741 "false", 742 "fetch", 743 "following", 744 "for", 745 "from", 746 "full", 747 "group", 748 "grouping", 749 "groups", 750 "hash", 751 "having", 752 "if", 753 "ignore", 754 "in", 755 "inner", 756 "intersect", 757 "interval", 758 "into", 759 "is", 760 "join", 761 "lateral", 762 "left", 763 "like", 764 "limit", 765 "lookup", 766 "merge", 767 "natural", 768 "new", 769 "no", 770 "not", 771 "null", 772 "nulls", 773 "of", 774 "on", 775 "or", 776 "order", 777 "outer", 778 "over", 779 "partition", 780 "preceding", 781 "proto", 782 "qualify", 783 "range", 784 "recursive", 785 "respect", 786 "right", 787 "rollup", 788 "rows", 789 "select", 790 "set", 791 "some", 792 "struct", 793 "tablesample", 794 "then", 795 "to", 796 "treat", 797 "true", 798 "unbounded", 799 "union", 800 "unnest", 801 "using", 802 "when", 803 "where", 804 "window", 805 "with", 806 "within", 807 } 808 809 def mod_sql(self, expression: exp.Mod) -> str: 810 this = expression.this 811 expr = expression.expression 812 return self.func( 813 "MOD", 814 this.unnest() if isinstance(this, exp.Paren) else this, 815 expr.unnest() if isinstance(expr, exp.Paren) else expr, 816 ) 817 818 def column_parts(self, expression: exp.Column) -> str: 819 if expression.meta.get("quoted_column"): 820 # If a column reference is of the form `dataset.table`.name, we need 821 # to preserve the quoted table path, otherwise the reference breaks 822 table_parts = ".".join(p.name for p in expression.parts[:-1]) 823 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 824 return f"{table_path}.{self.sql(expression, 'this')}" 825 826 return super().column_parts(expression) 827 828 def table_parts(self, expression: exp.Table) -> str: 829 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 830 # we need to make sure the correct quoting is used in each case. 831 # 832 # For example, if there is a CTE x that clashes with a schema name, then the former will 833 # return the table y in that schema, whereas the latter will return the CTE's y column: 834 # 835 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 836 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 837 if expression.meta.get("quoted_table"): 838 table_parts = ".".join(p.name for p in expression.parts) 839 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 840 841 return super().table_parts(expression) 842 843 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 844 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 845 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 846 847 def eq_sql(self, expression: exp.EQ) -> str: 848 # Operands of = cannot be NULL in BigQuery 849 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 850 if not isinstance(expression.parent, exp.Update): 851 return "NULL" 852 853 return self.binary(expression, "=") 854 855 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 856 parent = expression.parent 857 858 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 859 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 860 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 861 return self.func( 862 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 863 ) 864 865 return super().attimezone_sql(expression) 866 867 def trycast_sql(self, expression: exp.TryCast) -> str: 868 return self.cast_sql(expression, safe_prefix="SAFE_") 869 870 def bracket_sql(self, expression: exp.Bracket) -> str: 871 this = expression.this 872 expressions = expression.expressions 873 874 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 875 arg = expressions[0] 876 if arg.type is None: 877 from sqlglot.optimizer.annotate_types import annotate_types 878 879 arg = annotate_types(arg) 880 881 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 882 # BQ doesn't support bracket syntax with string values for structs 883 return f"{self.sql(this)}.{arg.name}" 884 885 expressions_sql = self.expressions(expression, flat=True) 886 offset = expression.args.get("offset") 887 888 if offset == 0: 889 expressions_sql = f"OFFSET({expressions_sql})" 890 elif offset == 1: 891 expressions_sql = f"ORDINAL({expressions_sql})" 892 elif offset is not None: 893 self.unsupported(f"Unsupported array offset: {offset}") 894 895 if expression.args.get("safe"): 896 expressions_sql = f"SAFE_{expressions_sql}" 897 898 return f"{self.sql(this)}[{expressions_sql}]" 899 900 def in_unnest_op(self, expression: exp.Unnest) -> str: 901 return self.sql(expression) 902 903 def except_op(self, expression: exp.Except) -> str: 904 if not expression.args.get("distinct"): 905 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 906 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 907 908 def intersect_op(self, expression: exp.Intersect) -> str: 909 if not expression.args.get("distinct"): 910 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 911 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 912 913 def version_sql(self, expression: exp.Version) -> str: 914 if expression.name == "TIMESTAMP": 915 expression.set("this", "SYSTEM_TIME") 916 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
252 def normalize_identifier(self, expression: E) -> E: 253 if isinstance(expression, exp.Identifier): 254 parent = expression.parent 255 while isinstance(parent, exp.Dot): 256 parent = parent.parent 257 258 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 259 # by default. The following check uses a heuristic to detect tables based on whether 260 # they are qualified. This should generally be correct, because tables in BigQuery 261 # must be qualified with at least a dataset, unless @@dataset_id is set. 262 case_sensitive = ( 263 isinstance(parent, exp.UserDefinedFunction) 264 or ( 265 isinstance(parent, exp.Table) 266 and parent.db 267 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 268 ) 269 or expression.meta.get("is_table") 270 ) 271 if not case_sensitive: 272 expression.set("this", expression.this.lower()) 273 274 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_COLUMN_JOIN_MARKS
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- COPY_PARAMS_ARE_CSV
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
276 class Tokenizer(tokens.Tokenizer): 277 QUOTES = ["'", '"', '"""', "'''"] 278 COMMENTS = ["--", "#", ("/*", "*/")] 279 IDENTIFIERS = ["`"] 280 STRING_ESCAPES = ["\\"] 281 282 HEX_STRINGS = [("0x", ""), ("0X", "")] 283 284 BYTE_STRINGS = [ 285 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 286 ] 287 288 RAW_STRINGS = [ 289 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 290 ] 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "ANY TYPE": TokenType.VARIANT, 295 "BEGIN": TokenType.COMMAND, 296 "BEGIN TRANSACTION": TokenType.BEGIN, 297 "BYTES": TokenType.BINARY, 298 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 299 "DATETIME": TokenType.TIMESTAMP, 300 "DECLARE": TokenType.COMMAND, 301 "ELSEIF": TokenType.COMMAND, 302 "EXCEPTION": TokenType.COMMAND, 303 "FLOAT64": TokenType.DOUBLE, 304 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 305 "MODEL": TokenType.MODEL, 306 "NOT DETERMINISTIC": TokenType.VOLATILE, 307 "RECORD": TokenType.STRUCT, 308 "TIMESTAMP": TokenType.TIMESTAMPTZ, 309 } 310 KEYWORDS.pop("DIV") 311 KEYWORDS.pop("VALUES")
Inherited Members
313 class Parser(parser.Parser): 314 PREFIXED_PIVOT_COLUMNS = True 315 LOG_DEFAULTS_TO_LN = True 316 SUPPORTS_IMPLICIT_UNNEST = True 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "DATE": _build_date, 321 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 322 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 323 "DATE_TRUNC": lambda args: exp.DateTrunc( 324 unit=exp.Literal.string(str(seq_get(args, 1))), 325 this=seq_get(args, 0), 326 ), 327 "DATETIME": build_timestamp_from_parts, 328 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 329 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 330 "DIV": binary_from_function(exp.IntDiv), 331 "FORMAT_DATE": lambda args: exp.TimeToStr( 332 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 333 ), 334 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 335 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 336 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 337 ), 338 "MD5": exp.MD5Digest.from_arg_list, 339 "TO_HEX": _build_to_hex, 340 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 341 [seq_get(args, 1), seq_get(args, 0)] 342 ), 343 "PARSE_TIMESTAMP": _build_parse_timestamp, 344 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 345 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 346 this=seq_get(args, 0), 347 expression=seq_get(args, 1), 348 position=seq_get(args, 2), 349 occurrence=seq_get(args, 3), 350 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 351 ), 352 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 353 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 354 "SPLIT": lambda args: exp.Split( 355 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1) or exp.Literal.string(","), 358 ), 359 "TIME": _build_time, 360 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 361 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 362 "TIMESTAMP": _build_timestamp, 363 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 364 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 365 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 367 ), 368 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 370 ), 371 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 372 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 373 } 374 375 FUNCTION_PARSERS = { 376 **parser.Parser.FUNCTION_PARSERS, 377 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 378 } 379 FUNCTION_PARSERS.pop("TRIM") 380 381 NO_PAREN_FUNCTIONS = { 382 **parser.Parser.NO_PAREN_FUNCTIONS, 383 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 384 } 385 386 NESTED_TYPE_TOKENS = { 387 *parser.Parser.NESTED_TYPE_TOKENS, 388 TokenType.TABLE, 389 } 390 391 PROPERTY_PARSERS = { 392 **parser.Parser.PROPERTY_PARSERS, 393 "NOT DETERMINISTIC": lambda self: self.expression( 394 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 395 ), 396 "OPTIONS": lambda self: self._parse_with_property(), 397 } 398 399 CONSTRAINT_PARSERS = { 400 **parser.Parser.CONSTRAINT_PARSERS, 401 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 402 } 403 404 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 405 RANGE_PARSERS.pop(TokenType.OVERLAPS) 406 407 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 408 409 STATEMENT_PARSERS = { 410 **parser.Parser.STATEMENT_PARSERS, 411 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 412 TokenType.END: lambda self: self._parse_as_command(self._prev), 413 TokenType.FOR: lambda self: self._parse_for_in(), 414 } 415 416 BRACKET_OFFSETS = { 417 "OFFSET": (0, False), 418 "ORDINAL": (1, False), 419 "SAFE_OFFSET": (0, True), 420 "SAFE_ORDINAL": (1, True), 421 } 422 423 def _parse_for_in(self) -> exp.ForIn: 424 this = self._parse_range() 425 self._match_text_seq("DO") 426 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 427 428 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 429 this = super()._parse_table_part(schema=schema) or self._parse_number() 430 431 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 432 if isinstance(this, exp.Identifier): 433 table_name = this.name 434 while self._match(TokenType.DASH, advance=False) and self._next: 435 text = "" 436 while self._curr and self._curr.token_type != TokenType.DOT: 437 self._advance() 438 text += self._prev.text 439 table_name += text 440 441 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 442 elif isinstance(this, exp.Literal): 443 table_name = this.name 444 445 if self._is_connected() and self._parse_var(any_token=True): 446 table_name += self._prev.text 447 448 this = exp.Identifier(this=table_name, quoted=True) 449 450 return this 451 452 def _parse_table_parts( 453 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 454 ) -> exp.Table: 455 table = super()._parse_table_parts( 456 schema=schema, is_db_reference=is_db_reference, wildcard=True 457 ) 458 459 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 460 if not table.catalog: 461 if table.db: 462 parts = table.db.split(".") 463 if len(parts) == 2 and not table.args["db"].quoted: 464 table.set("catalog", exp.Identifier(this=parts[0])) 465 table.set("db", exp.Identifier(this=parts[1])) 466 else: 467 parts = table.name.split(".") 468 if len(parts) == 2 and not table.this.quoted: 469 table.set("db", exp.Identifier(this=parts[0])) 470 table.set("this", exp.Identifier(this=parts[1])) 471 472 if any("." in p.name for p in table.parts): 473 catalog, db, this, *rest = ( 474 exp.to_identifier(p, quoted=True) 475 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 476 ) 477 478 if rest and this: 479 this = exp.Dot.build([this, *rest]) # type: ignore 480 481 table = exp.Table( 482 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 483 ) 484 table.meta["quoted_table"] = True 485 486 return table 487 488 def _parse_column(self) -> t.Optional[exp.Expression]: 489 column = super()._parse_column() 490 if isinstance(column, exp.Column): 491 parts = column.parts 492 if any("." in p.name for p in parts): 493 catalog, db, table, this, *rest = ( 494 exp.to_identifier(p, quoted=True) 495 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 496 ) 497 498 if rest and this: 499 this = exp.Dot.build([this, *rest]) # type: ignore 500 501 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 502 column.meta["quoted_column"] = True 503 504 return column 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 508 509 @t.overload 510 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 511 512 def _parse_json_object(self, agg=False): 513 json_object = super()._parse_json_object() 514 array_kv_pair = seq_get(json_object.expressions, 0) 515 516 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 517 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 518 if ( 519 array_kv_pair 520 and isinstance(array_kv_pair.this, exp.Array) 521 and isinstance(array_kv_pair.expression, exp.Array) 522 ): 523 keys = array_kv_pair.this.expressions 524 values = array_kv_pair.expression.expressions 525 526 json_object.set( 527 "expressions", 528 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 529 ) 530 531 return json_object 532 533 def _parse_bracket( 534 self, this: t.Optional[exp.Expression] = None 535 ) -> t.Optional[exp.Expression]: 536 bracket = super()._parse_bracket(this) 537 538 if this is bracket: 539 return bracket 540 541 if isinstance(bracket, exp.Bracket): 542 for expression in bracket.expressions: 543 name = expression.name.upper() 544 545 if name not in self.BRACKET_OFFSETS: 546 break 547 548 offset, safe = self.BRACKET_OFFSETS[name] 549 bracket.set("offset", offset) 550 bracket.set("safe", safe) 551 expression.replace(expression.expressions[0]) 552 553 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
555 class Generator(generator.Generator): 556 EXPLICIT_UNION = True 557 INTERVAL_ALLOWS_PLURAL_FORM = False 558 JOIN_HINTS = False 559 QUERY_HINTS = False 560 TABLE_HINTS = False 561 LIMIT_FETCH = "LIMIT" 562 RENAME_TABLE_WITH_DB = False 563 NVL2_SUPPORTED = False 564 UNNEST_WITH_ORDINALITY = False 565 COLLATE_IS_FUNC = True 566 LIMIT_ONLY_LITERALS = True 567 SUPPORTS_TABLE_ALIAS_COLUMNS = False 568 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 569 JSON_KEY_VALUE_PAIR_SEP = "," 570 NULL_ORDERING_SUPPORTED = False 571 IGNORE_NULLS_IN_FUNC = True 572 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 573 CAN_IMPLEMENT_ARRAY_ANY = True 574 SUPPORTS_TO_NUMBER = False 575 NAMED_PLACEHOLDER_TOKEN = "@" 576 HEX_FUNC = "TO_HEX" 577 WITH_PROPERTIES_PREFIX = "OPTIONS" 578 579 TRANSFORMS = { 580 **generator.Generator.TRANSFORMS, 581 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 582 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 583 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 584 exp.Array: inline_array_unless_query, 585 exp.ArrayContains: _array_contains_sql, 586 exp.ArrayFilter: filter_array_using_unnest, 587 exp.ArraySize: rename_func("ARRAY_LENGTH"), 588 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 589 exp.CollateProperty: lambda self, e: ( 590 f"DEFAULT COLLATE {self.sql(e, 'this')}" 591 if e.args.get("default") 592 else f"COLLATE {self.sql(e, 'this')}" 593 ), 594 exp.Commit: lambda *_: "COMMIT TRANSACTION", 595 exp.CountIf: rename_func("COUNTIF"), 596 exp.Create: _create_sql, 597 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 598 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 599 exp.DateDiff: lambda self, e: self.func( 600 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 601 ), 602 exp.DateFromParts: rename_func("DATE"), 603 exp.DateStrToDate: datestrtodate_sql, 604 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 605 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 606 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 607 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 608 exp.FromTimeZone: lambda self, e: self.func( 609 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 610 ), 611 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 612 exp.GroupConcat: rename_func("STRING_AGG"), 613 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 614 exp.If: if_sql(false_value="NULL"), 615 exp.ILike: no_ilike_sql, 616 exp.IntDiv: rename_func("DIV"), 617 exp.JSONFormat: rename_func("TO_JSON_STRING"), 618 exp.Max: max_or_greatest, 619 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 620 exp.MD5Digest: rename_func("MD5"), 621 exp.Min: min_or_least, 622 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 623 exp.RegexpExtract: lambda self, e: self.func( 624 "REGEXP_EXTRACT", 625 e.this, 626 e.expression, 627 e.args.get("position"), 628 e.args.get("occurrence"), 629 ), 630 exp.RegexpReplace: regexp_replace_sql, 631 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 632 exp.ReturnsProperty: _returnsproperty_sql, 633 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 634 exp.Select: transforms.preprocess( 635 [ 636 transforms.explode_to_unnest(), 637 transforms.unqualify_unnest, 638 transforms.eliminate_distinct_on, 639 _alias_ordered_group, 640 transforms.eliminate_semi_and_anti_joins, 641 ] 642 ), 643 exp.SHA: rename_func("SHA1"), 644 exp.SHA2: sha256_sql, 645 exp.StabilityProperty: lambda self, e: ( 646 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 647 ), 648 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 649 exp.StrToTime: lambda self, e: self.func( 650 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 651 ), 652 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 653 exp.TimeFromParts: rename_func("TIME"), 654 exp.TimestampFromParts: rename_func("DATETIME"), 655 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 656 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 657 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 658 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 659 exp.TimeStrToTime: timestrtotime_sql, 660 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 661 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 662 exp.TsOrDsAdd: _ts_or_ds_add_sql, 663 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 664 exp.TsOrDsToTime: rename_func("TIME"), 665 exp.Unhex: rename_func("FROM_HEX"), 666 exp.UnixDate: rename_func("UNIX_DATE"), 667 exp.UnixToTime: _unix_to_time_sql, 668 exp.Values: _derived_table_values_to_unnest, 669 exp.VariancePop: rename_func("VAR_POP"), 670 } 671 672 SUPPORTED_JSON_PATH_PARTS = { 673 exp.JSONPathKey, 674 exp.JSONPathRoot, 675 exp.JSONPathSubscript, 676 } 677 678 TYPE_MAPPING = { 679 **generator.Generator.TYPE_MAPPING, 680 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 681 exp.DataType.Type.BIGINT: "INT64", 682 exp.DataType.Type.BINARY: "BYTES", 683 exp.DataType.Type.BOOLEAN: "BOOL", 684 exp.DataType.Type.CHAR: "STRING", 685 exp.DataType.Type.DECIMAL: "NUMERIC", 686 exp.DataType.Type.DOUBLE: "FLOAT64", 687 exp.DataType.Type.FLOAT: "FLOAT64", 688 exp.DataType.Type.INT: "INT64", 689 exp.DataType.Type.NCHAR: "STRING", 690 exp.DataType.Type.NVARCHAR: "STRING", 691 exp.DataType.Type.SMALLINT: "INT64", 692 exp.DataType.Type.TEXT: "STRING", 693 exp.DataType.Type.TIMESTAMP: "DATETIME", 694 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 695 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 696 exp.DataType.Type.TINYINT: "INT64", 697 exp.DataType.Type.VARBINARY: "BYTES", 698 exp.DataType.Type.ROWVERSION: "BYTES", 699 exp.DataType.Type.VARCHAR: "STRING", 700 exp.DataType.Type.VARIANT: "ANY TYPE", 701 } 702 703 PROPERTIES_LOCATION = { 704 **generator.Generator.PROPERTIES_LOCATION, 705 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 706 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 707 } 708 709 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 710 RESERVED_KEYWORDS = { 711 "all", 712 "and", 713 "any", 714 "array", 715 "as", 716 "asc", 717 "assert_rows_modified", 718 "at", 719 "between", 720 "by", 721 "case", 722 "cast", 723 "collate", 724 "contains", 725 "create", 726 "cross", 727 "cube", 728 "current", 729 "default", 730 "define", 731 "desc", 732 "distinct", 733 "else", 734 "end", 735 "enum", 736 "escape", 737 "except", 738 "exclude", 739 "exists", 740 "extract", 741 "false", 742 "fetch", 743 "following", 744 "for", 745 "from", 746 "full", 747 "group", 748 "grouping", 749 "groups", 750 "hash", 751 "having", 752 "if", 753 "ignore", 754 "in", 755 "inner", 756 "intersect", 757 "interval", 758 "into", 759 "is", 760 "join", 761 "lateral", 762 "left", 763 "like", 764 "limit", 765 "lookup", 766 "merge", 767 "natural", 768 "new", 769 "no", 770 "not", 771 "null", 772 "nulls", 773 "of", 774 "on", 775 "or", 776 "order", 777 "outer", 778 "over", 779 "partition", 780 "preceding", 781 "proto", 782 "qualify", 783 "range", 784 "recursive", 785 "respect", 786 "right", 787 "rollup", 788 "rows", 789 "select", 790 "set", 791 "some", 792 "struct", 793 "tablesample", 794 "then", 795 "to", 796 "treat", 797 "true", 798 "unbounded", 799 "union", 800 "unnest", 801 "using", 802 "when", 803 "where", 804 "window", 805 "with", 806 "within", 807 } 808 809 def mod_sql(self, expression: exp.Mod) -> str: 810 this = expression.this 811 expr = expression.expression 812 return self.func( 813 "MOD", 814 this.unnest() if isinstance(this, exp.Paren) else this, 815 expr.unnest() if isinstance(expr, exp.Paren) else expr, 816 ) 817 818 def column_parts(self, expression: exp.Column) -> str: 819 if expression.meta.get("quoted_column"): 820 # If a column reference is of the form `dataset.table`.name, we need 821 # to preserve the quoted table path, otherwise the reference breaks 822 table_parts = ".".join(p.name for p in expression.parts[:-1]) 823 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 824 return f"{table_path}.{self.sql(expression, 'this')}" 825 826 return super().column_parts(expression) 827 828 def table_parts(self, expression: exp.Table) -> str: 829 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 830 # we need to make sure the correct quoting is used in each case. 831 # 832 # For example, if there is a CTE x that clashes with a schema name, then the former will 833 # return the table y in that schema, whereas the latter will return the CTE's y column: 834 # 835 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 836 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 837 if expression.meta.get("quoted_table"): 838 table_parts = ".".join(p.name for p in expression.parts) 839 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 840 841 return super().table_parts(expression) 842 843 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 844 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 845 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 846 847 def eq_sql(self, expression: exp.EQ) -> str: 848 # Operands of = cannot be NULL in BigQuery 849 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 850 if not isinstance(expression.parent, exp.Update): 851 return "NULL" 852 853 return self.binary(expression, "=") 854 855 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 856 parent = expression.parent 857 858 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 859 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 860 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 861 return self.func( 862 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 863 ) 864 865 return super().attimezone_sql(expression) 866 867 def trycast_sql(self, expression: exp.TryCast) -> str: 868 return self.cast_sql(expression, safe_prefix="SAFE_") 869 870 def bracket_sql(self, expression: exp.Bracket) -> str: 871 this = expression.this 872 expressions = expression.expressions 873 874 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 875 arg = expressions[0] 876 if arg.type is None: 877 from sqlglot.optimizer.annotate_types import annotate_types 878 879 arg = annotate_types(arg) 880 881 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 882 # BQ doesn't support bracket syntax with string values for structs 883 return f"{self.sql(this)}.{arg.name}" 884 885 expressions_sql = self.expressions(expression, flat=True) 886 offset = expression.args.get("offset") 887 888 if offset == 0: 889 expressions_sql = f"OFFSET({expressions_sql})" 890 elif offset == 1: 891 expressions_sql = f"ORDINAL({expressions_sql})" 892 elif offset is not None: 893 self.unsupported(f"Unsupported array offset: {offset}") 894 895 if expression.args.get("safe"): 896 expressions_sql = f"SAFE_{expressions_sql}" 897 898 return f"{self.sql(this)}[{expressions_sql}]" 899 900 def in_unnest_op(self, expression: exp.Unnest) -> str: 901 return self.sql(expression) 902 903 def except_op(self, expression: exp.Except) -> str: 904 if not expression.args.get("distinct"): 905 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 906 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 907 908 def intersect_op(self, expression: exp.Intersect) -> str: 909 if not expression.args.get("distinct"): 910 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 911 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 912 913 def version_sql(self, expression: exp.Version) -> str: 914 if expression.name == "TIMESTAMP": 915 expression.set("this", "SYSTEM_TIME") 916 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
818 def column_parts(self, expression: exp.Column) -> str: 819 if expression.meta.get("quoted_column"): 820 # If a column reference is of the form `dataset.table`.name, we need 821 # to preserve the quoted table path, otherwise the reference breaks 822 table_parts = ".".join(p.name for p in expression.parts[:-1]) 823 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 824 return f"{table_path}.{self.sql(expression, 'this')}" 825 826 return super().column_parts(expression)
828 def table_parts(self, expression: exp.Table) -> str: 829 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 830 # we need to make sure the correct quoting is used in each case. 831 # 832 # For example, if there is a CTE x that clashes with a schema name, then the former will 833 # return the table y in that schema, whereas the latter will return the CTE's y column: 834 # 835 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 836 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 837 if expression.meta.get("quoted_table"): 838 table_parts = ".".join(p.name for p in expression.parts) 839 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 840 841 return super().table_parts(expression)
855 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 856 parent = expression.parent 857 858 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 859 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 860 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 861 return self.func( 862 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 863 ) 864 865 return super().attimezone_sql(expression)
870 def bracket_sql(self, expression: exp.Bracket) -> str: 871 this = expression.this 872 expressions = expression.expressions 873 874 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 875 arg = expressions[0] 876 if arg.type is None: 877 from sqlglot.optimizer.annotate_types import annotate_types 878 879 arg = annotate_types(arg) 880 881 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 882 # BQ doesn't support bracket syntax with string values for structs 883 return f"{self.sql(this)}.{arg.name}" 884 885 expressions_sql = self.expressions(expression, flat=True) 886 offset = expression.args.get("offset") 887 888 if offset == 0: 889 expressions_sql = f"OFFSET({expressions_sql})" 890 elif offset == 1: 891 expressions_sql = f"ORDINAL({expressions_sql})" 892 elif offset is not None: 893 self.unsupported(f"Unsupported array offset: {offset}") 894 895 if expression.args.get("safe"): 896 expressions_sql = f"SAFE_{expressions_sql}" 897 898 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- OUTER_UNION_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql