sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_delta_to_binary_interval_op, 19 date_trunc_to_time, 20 datestrtodate_sql, 21 no_datetime_sql, 22 encode_decode_sql, 23 build_formatted_time, 24 inline_array_unless_query, 25 no_comment_column_constraint_sql, 26 no_time_sql, 27 no_timestamp_sql, 28 pivot_column_names, 29 rename_func, 30 remove_from_array_using_filter, 31 strposition_sql, 32 str_to_time_sql, 33 timestamptrunc_sql, 34 timestrtotime_sql, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47 48# BigQuery -> DuckDB conversion for the DATE function 49def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 50 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 51 zone = self.sql(expression, "zone") 52 53 if zone: 54 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 55 date_str = f"{date_str} || ' ' || {zone}" 56 57 # This will create a TIMESTAMP with time zone information 58 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 59 60 return result 61 62 63# BigQuery -> DuckDB conversion for the TIME_DIFF function 64def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 65 this = exp.cast(expression.this, exp.DataType.Type.TIME) 66 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 67 68 # Although the 2 dialects share similar signatures, BQ seems to inverse 69 # the sign of the result so the start/end time operands are flipped 70 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 71 72 73@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 74def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 75 return self.func("ARRAY_SORT", expression.this) 76 77 78def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 79 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 80 return self.func(name, expression.this) 81 82 83def _build_sort_array_desc(args: t.List) -> exp.Expression: 84 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 85 86 87def _build_date_diff(args: t.List) -> exp.Expression: 88 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 89 90 91def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 92 def _builder(args: t.List) -> exp.GenerateSeries: 93 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 94 if len(args) == 1: 95 # DuckDB uses 0 as a default for the series' start when it's omitted 96 args.insert(0, exp.Literal.number("0")) 97 98 gen_series = exp.GenerateSeries.from_arg_list(args) 99 gen_series.set("is_end_exclusive", end_exclusive) 100 101 return gen_series 102 103 return _builder 104 105 106def _build_make_timestamp(args: t.List) -> exp.Expression: 107 if len(args) == 1: 108 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 109 110 return exp.TimestampFromParts( 111 year=seq_get(args, 0), 112 month=seq_get(args, 1), 113 day=seq_get(args, 2), 114 hour=seq_get(args, 3), 115 min=seq_get(args, 4), 116 sec=seq_get(args, 5), 117 ) 118 119 120def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 121 def _parse(self: DuckDB.Parser) -> exp.Show: 122 return self._parse_show_duckdb(*args, **kwargs) 123 124 return _parse 125 126 127def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 128 args: t.List[str] = [] 129 130 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 131 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 132 # The transformation to ROW will take place if: 133 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 134 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 135 ancestor_cast = expression.find_ancestor(exp.Cast) 136 is_bq_inline_struct = ( 137 (expression.find(exp.PropertyEQ) is None) 138 and ancestor_cast 139 and any( 140 casted_type.is_type(exp.DataType.Type.STRUCT) 141 for casted_type in ancestor_cast.find_all(exp.DataType) 142 ) 143 ) 144 145 for i, expr in enumerate(expression.expressions): 146 is_property_eq = isinstance(expr, exp.PropertyEQ) 147 value = expr.expression if is_property_eq else expr 148 149 if is_bq_inline_struct: 150 args.append(self.sql(value)) 151 else: 152 if is_property_eq: 153 if isinstance(expr.this, exp.Identifier): 154 key = self.sql(exp.Literal.string(expr.name)) 155 else: 156 key = self.sql(expr.this) 157 else: 158 key = self.sql(exp.Literal.string(f"_{i}")) 159 160 args.append(f"{key}: {self.sql(value)}") 161 162 csv_args = ", ".join(args) 163 164 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 165 166 167def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 168 if expression.is_type("array"): 169 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 170 171 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 172 if expression.is_type( 173 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 174 ): 175 return expression.this.value 176 177 return self.datatype_sql(expression) 178 179 180def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 181 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 182 return f"CAST({sql} AS TEXT)" 183 184 185def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 186 scale = expression.args.get("scale") 187 timestamp = expression.this 188 189 if scale in (None, exp.UnixToTime.SECONDS): 190 return self.func("TO_TIMESTAMP", timestamp) 191 if scale == exp.UnixToTime.MILLIS: 192 return self.func("EPOCH_MS", timestamp) 193 if scale == exp.UnixToTime.MICROS: 194 return self.func("MAKE_TIMESTAMP", timestamp) 195 196 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 197 198 199WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 200 201 202def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 203 arrow_sql = arrow_json_extract_sql(self, expression) 204 if not expression.same_parent and isinstance( 205 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 206 ): 207 arrow_sql = self.wrap(arrow_sql) 208 return arrow_sql 209 210 211def _implicit_datetime_cast( 212 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 213) -> t.Optional[exp.Expression]: 214 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 215 216 217def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 218 this = _implicit_datetime_cast(expression.this) 219 expr = _implicit_datetime_cast(expression.expression) 220 221 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 222 223 224def _generate_datetime_array_sql( 225 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 226) -> str: 227 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 228 229 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 230 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 231 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 232 233 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 234 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 235 start=start, end=end, step=expression.args.get("step") 236 ) 237 238 if is_generate_date_array: 239 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 240 # GENERATE_DATE_ARRAY we must cast it back to DATE array 241 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 242 243 return self.sql(gen_series) 244 245 246def _json_extract_value_array_sql( 247 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 248) -> str: 249 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 250 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 251 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 252 253 254class DuckDB(Dialect): 255 NULL_ORDERING = "nulls_are_last" 256 SUPPORTS_USER_DEFINED_TYPES = True 257 SAFE_DIVISION = True 258 INDEX_OFFSET = 1 259 CONCAT_COALESCE = True 260 SUPPORTS_ORDER_BY_ALL = True 261 SUPPORTS_FIXED_SIZE_ARRAYS = True 262 STRICT_JSON_PATH_SYNTAX = False 263 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 264 265 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 266 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 267 268 DATE_PART_MAPPING = { 269 **Dialect.DATE_PART_MAPPING, 270 "DAYOFWEEKISO": "ISODOW", 271 } 272 DATE_PART_MAPPING.pop("WEEKDAY") 273 274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path) 285 286 class Tokenizer(tokens.Tokenizer): 287 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 288 HEREDOC_STRINGS = ["$"] 289 290 HEREDOC_TAG_IS_IDENTIFIER = True 291 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "//": TokenType.DIV, 296 "**": TokenType.DSTAR, 297 "^@": TokenType.CARET_AT, 298 "@>": TokenType.AT_GT, 299 "<@": TokenType.LT_AT, 300 "ATTACH": TokenType.ATTACH, 301 "BINARY": TokenType.VARBINARY, 302 "BITSTRING": TokenType.BIT, 303 "BPCHAR": TokenType.TEXT, 304 "CHAR": TokenType.TEXT, 305 "DATETIME": TokenType.TIMESTAMPNTZ, 306 "DETACH": TokenType.DETACH, 307 "EXCLUDE": TokenType.EXCEPT, 308 "LOGICAL": TokenType.BOOLEAN, 309 "ONLY": TokenType.ONLY, 310 "PIVOT_WIDER": TokenType.PIVOT, 311 "POSITIONAL": TokenType.POSITIONAL, 312 "RESET": TokenType.COMMAND, 313 "SIGNED": TokenType.INT, 314 "STRING": TokenType.TEXT, 315 "SUMMARIZE": TokenType.SUMMARIZE, 316 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 317 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 318 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 319 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 320 "TIMESTAMP_US": TokenType.TIMESTAMP, 321 "UBIGINT": TokenType.UBIGINT, 322 "UINTEGER": TokenType.UINT, 323 "USMALLINT": TokenType.USMALLINT, 324 "UTINYINT": TokenType.UTINYINT, 325 "VARCHAR": TokenType.TEXT, 326 } 327 KEYWORDS.pop("/*+") 328 329 SINGLE_TOKENS = { 330 **tokens.Tokenizer.SINGLE_TOKENS, 331 "$": TokenType.PARAMETER, 332 } 333 334 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 335 336 class Parser(parser.Parser): 337 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 338 339 BITWISE = { 340 **parser.Parser.BITWISE, 341 TokenType.TILDA: exp.RegexpLike, 342 } 343 BITWISE.pop(TokenType.CARET) 344 345 RANGE_PARSERS = { 346 **parser.Parser.RANGE_PARSERS, 347 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 348 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 349 } 350 351 EXPONENT = { 352 **parser.Parser.EXPONENT, 353 TokenType.CARET: exp.Pow, 354 TokenType.DSTAR: exp.Pow, 355 } 356 357 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 358 359 SHOW_PARSERS = { 360 "TABLES": _show_parser("TABLES"), 361 "ALL TABLES": _show_parser("ALL TABLES"), 362 } 363 364 FUNCTIONS = { 365 **parser.Parser.FUNCTIONS, 366 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "EDITDIST3": exp.Levenshtein.from_arg_list, 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "GENERATE_SERIES": _build_generate_series(), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 389 "LIST_HAS": exp.ArrayContains.from_arg_list, 390 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 391 "LIST_REVERSE_SORT": _build_sort_array_desc, 392 "LIST_SORT": exp.SortArray.from_arg_list, 393 "LIST_VALUE": lambda args: exp.Array(expressions=args), 394 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 395 "MAKE_TIMESTAMP": _build_make_timestamp, 396 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 397 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 398 "RANGE": _build_generate_series(end_exclusive=True), 399 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 400 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 401 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 402 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 403 this=seq_get(args, 0), 404 expression=seq_get(args, 1), 405 replacement=seq_get(args, 2), 406 modifiers=seq_get(args, 3), 407 ), 408 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 409 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 410 "STRING_SPLIT": exp.Split.from_arg_list, 411 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 412 "STRING_TO_ARRAY": exp.Split.from_arg_list, 413 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 414 "STRUCT_PACK": exp.Struct.from_arg_list, 415 "STR_SPLIT": exp.Split.from_arg_list, 416 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 417 "TIME_BUCKET": exp.DateBin.from_arg_list, 418 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 419 "UNNEST": exp.Explode.from_arg_list, 420 "XOR": binary_from_function(exp.BitwiseXor), 421 } 422 423 FUNCTIONS.pop("DATE_SUB") 424 FUNCTIONS.pop("GLOB") 425 426 FUNCTION_PARSERS = { 427 **parser.Parser.FUNCTION_PARSERS, 428 **dict.fromkeys( 429 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 430 ), 431 } 432 FUNCTION_PARSERS.pop("DECODE") 433 434 NO_PAREN_FUNCTION_PARSERS = { 435 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 436 "MAP": lambda self: self._parse_map(), 437 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 438 } 439 440 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 441 TokenType.SEMI, 442 TokenType.ANTI, 443 } 444 445 PLACEHOLDER_PARSERS = { 446 **parser.Parser.PLACEHOLDER_PARSERS, 447 TokenType.PARAMETER: lambda self: ( 448 self.expression(exp.Placeholder, this=self._prev.text) 449 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 450 else None 451 ), 452 } 453 454 TYPE_CONVERTERS = { 455 # https://duckdb.org/docs/sql/data_types/numeric 456 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 457 # https://duckdb.org/docs/sql/data_types/text 458 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 459 } 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 464 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 465 TokenType.SHOW: lambda self: self._parse_show(), 466 } 467 468 SET_PARSERS = { 469 **parser.Parser.SET_PARSERS, 470 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 471 } 472 473 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 474 index = self._index 475 if not self._match_text_seq("LAMBDA"): 476 return super()._parse_lambda(alias=alias) 477 478 expressions = self._parse_csv(self._parse_lambda_arg) 479 if not self._match(TokenType.COLON): 480 self._retreat(index) 481 return None 482 483 this = self._replace_lambda(self._parse_assignment(), expressions) 484 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 485 486 def _parse_expression(self) -> t.Optional[exp.Expression]: 487 # DuckDB supports prefix aliases, e.g. foo: 1 488 if self._next and self._next.token_type == TokenType.COLON: 489 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 490 self._match(TokenType.COLON) 491 comments = self._prev_comments or [] 492 493 this = self._parse_assignment() 494 if isinstance(this, exp.Expression): 495 # Moves the comment next to the alias in `alias: expr /* comment */` 496 comments += this.pop_comments() or [] 497 498 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 499 500 return super()._parse_expression() 501 502 def _parse_table( 503 self, 504 schema: bool = False, 505 joins: bool = False, 506 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 507 parse_bracket: bool = False, 508 is_db_reference: bool = False, 509 parse_partition: bool = False, 510 consume_pipe: bool = False, 511 ) -> t.Optional[exp.Expression]: 512 # DuckDB supports prefix aliases, e.g. FROM foo: bar 513 if self._next and self._next.token_type == TokenType.COLON: 514 alias = self._parse_table_alias( 515 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 516 ) 517 self._match(TokenType.COLON) 518 comments = self._prev_comments or [] 519 else: 520 alias = None 521 comments = [] 522 523 table = super()._parse_table( 524 schema=schema, 525 joins=joins, 526 alias_tokens=alias_tokens, 527 parse_bracket=parse_bracket, 528 is_db_reference=is_db_reference, 529 parse_partition=parse_partition, 530 ) 531 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 532 # Moves the comment next to the alias in `alias: table /* comment */` 533 comments += table.pop_comments() or [] 534 alias.comments = alias.pop_comments() + comments 535 table.set("alias", alias) 536 537 return table 538 539 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 540 # https://duckdb.org/docs/sql/samples.html 541 sample = super()._parse_table_sample(as_modifier=as_modifier) 542 if sample and not sample.args.get("method"): 543 if sample.args.get("size"): 544 sample.set("method", exp.var("RESERVOIR")) 545 else: 546 sample.set("method", exp.var("SYSTEM")) 547 548 return sample 549 550 def _parse_bracket( 551 self, this: t.Optional[exp.Expression] = None 552 ) -> t.Optional[exp.Expression]: 553 bracket = super()._parse_bracket(this) 554 555 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 556 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 557 bracket.set("returns_list_for_maps", True) 558 559 return bracket 560 561 def _parse_map(self) -> exp.ToMap | exp.Map: 562 if self._match(TokenType.L_BRACE, advance=False): 563 return self.expression(exp.ToMap, this=self._parse_bracket()) 564 565 args = self._parse_wrapped_csv(self._parse_assignment) 566 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 567 568 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 569 return self._parse_field_def() 570 571 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 572 if len(aggregations) == 1: 573 return super()._pivot_column_names(aggregations) 574 return pivot_column_names(aggregations, dialect="duckdb") 575 576 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 577 def _parse_attach_option() -> exp.AttachOption: 578 return self.expression( 579 exp.AttachOption, 580 this=self._parse_var(any_token=True), 581 expression=self._parse_field(any_token=True), 582 ) 583 584 self._match(TokenType.DATABASE) 585 exists = self._parse_exists(not_=is_attach) 586 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 587 588 if self._match(TokenType.L_PAREN, advance=False): 589 expressions = self._parse_wrapped_csv(_parse_attach_option) 590 else: 591 expressions = None 592 593 return ( 594 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 595 if is_attach 596 else self.expression(exp.Detach, this=this, exists=exists) 597 ) 598 599 def _parse_show_duckdb(self, this: str) -> exp.Show: 600 return self.expression(exp.Show, this=this) 601 602 def _parse_primary(self) -> t.Optional[exp.Expression]: 603 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 604 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 605 606 return super()._parse_primary() 607 608 class Generator(generator.Generator): 609 PARAMETER_TOKEN = "$" 610 NAMED_PLACEHOLDER_TOKEN = "$" 611 JOIN_HINTS = False 612 TABLE_HINTS = False 613 QUERY_HINTS = False 614 LIMIT_FETCH = "LIMIT" 615 STRUCT_DELIMITER = ("(", ")") 616 RENAME_TABLE_WITH_DB = False 617 NVL2_SUPPORTED = False 618 SEMI_ANTI_JOIN_WITH_SIDE = False 619 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 620 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 621 LAST_DAY_SUPPORTS_DATE_PART = False 622 JSON_KEY_VALUE_PAIR_SEP = "," 623 IGNORE_NULLS_IN_FUNC = True 624 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 625 SUPPORTS_CREATE_TABLE_LIKE = False 626 MULTI_ARG_DISTINCT = False 627 CAN_IMPLEMENT_ARRAY_ANY = True 628 SUPPORTS_TO_NUMBER = False 629 SUPPORTS_WINDOW_EXCLUDE = True 630 COPY_HAS_INTO_KEYWORD = False 631 STAR_EXCEPT = "EXCLUDE" 632 PAD_FILL_PATTERN_IS_REQUIRED = True 633 ARRAY_CONCAT_IS_VAR_LEN = False 634 ARRAY_SIZE_DIM_REQUIRED = False 635 NORMALIZE_EXTRACT_DATE_PARTS = True 636 SUPPORTS_LIKE_QUANTIFIERS = False 637 638 TRANSFORMS = { 639 **generator.Generator.TRANSFORMS, 640 exp.ApproxDistinct: approx_count_distinct_sql, 641 exp.Array: inline_array_unless_query, 642 exp.ArrayFilter: rename_func("LIST_FILTER"), 643 exp.ArrayRemove: remove_from_array_using_filter, 644 exp.ArraySort: _array_sort_sql, 645 exp.ArraySum: rename_func("LIST_SUM"), 646 exp.BitwiseXor: rename_func("XOR"), 647 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 648 exp.CurrentDate: lambda *_: "CURRENT_DATE", 649 exp.CurrentTime: lambda *_: "CURRENT_TIME", 650 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 651 exp.DayOfMonth: rename_func("DAYOFMONTH"), 652 exp.DayOfWeek: rename_func("DAYOFWEEK"), 653 exp.DayOfWeekIso: rename_func("ISODOW"), 654 exp.DayOfYear: rename_func("DAYOFYEAR"), 655 exp.DataType: _datatype_sql, 656 exp.Date: _date_sql, 657 exp.DateAdd: date_delta_to_binary_interval_op(), 658 exp.DateFromParts: rename_func("MAKE_DATE"), 659 exp.DateSub: date_delta_to_binary_interval_op(), 660 exp.DateDiff: _date_diff_sql, 661 exp.DateStrToDate: datestrtodate_sql, 662 exp.Datetime: no_datetime_sql, 663 exp.DatetimeSub: date_delta_to_binary_interval_op(), 664 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 665 exp.DateToDi: lambda self, 666 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 667 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 668 exp.DiToDate: lambda self, 669 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 670 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 671 exp.GenerateDateArray: _generate_datetime_array_sql, 672 exp.GenerateTimestampArray: _generate_datetime_array_sql, 673 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 674 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 675 exp.Explode: rename_func("UNNEST"), 676 exp.IntDiv: lambda self, e: self.binary(e, "//"), 677 exp.IsInf: rename_func("ISINF"), 678 exp.IsNan: rename_func("ISNAN"), 679 exp.JSONBExists: rename_func("JSON_EXISTS"), 680 exp.JSONExtract: _arrow_json_extract_sql, 681 exp.JSONExtractArray: _json_extract_value_array_sql, 682 exp.JSONExtractScalar: _arrow_json_extract_sql, 683 exp.JSONFormat: _json_format_sql, 684 exp.JSONValueArray: _json_extract_value_array_sql, 685 exp.Lateral: explode_to_unnest_sql, 686 exp.LogicalOr: rename_func("BOOL_OR"), 687 exp.LogicalAnd: rename_func("BOOL_AND"), 688 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 689 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 690 exp.MonthsBetween: lambda self, e: self.func( 691 "DATEDIFF", 692 "'month'", 693 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 694 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 695 ), 696 exp.PercentileCont: rename_func("QUANTILE_CONT"), 697 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 698 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 699 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 700 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 701 exp.RegexpReplace: lambda self, e: self.func( 702 "REGEXP_REPLACE", 703 e.this, 704 e.expression, 705 e.args.get("replacement"), 706 e.args.get("modifiers"), 707 ), 708 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 709 exp.RegexpILike: lambda self, e: self.func( 710 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 711 ), 712 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 713 exp.Return: lambda self, e: self.sql(e, "this"), 714 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 715 exp.Rand: rename_func("RANDOM"), 716 exp.SHA: rename_func("SHA1"), 717 exp.SHA2: sha256_sql, 718 exp.Split: rename_func("STR_SPLIT"), 719 exp.SortArray: _sort_array_sql, 720 exp.StrPosition: strposition_sql, 721 exp.StrToUnix: lambda self, e: self.func( 722 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 723 ), 724 exp.Struct: _struct_sql, 725 exp.Transform: rename_func("LIST_TRANSFORM"), 726 exp.TimeAdd: date_delta_to_binary_interval_op(), 727 exp.Time: no_time_sql, 728 exp.TimeDiff: _timediff_sql, 729 exp.Timestamp: no_timestamp_sql, 730 exp.TimestampDiff: lambda self, e: self.func( 731 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 732 ), 733 exp.TimestampTrunc: timestamptrunc_sql(), 734 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 735 exp.TimeStrToTime: timestrtotime_sql, 736 exp.TimeStrToUnix: lambda self, e: self.func( 737 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 738 ), 739 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 740 exp.TimeToUnix: rename_func("EPOCH"), 741 exp.TsOrDiToDi: lambda self, 742 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 743 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 744 exp.TsOrDsDiff: lambda self, e: self.func( 745 "DATE_DIFF", 746 f"'{e.args.get('unit') or 'DAY'}'", 747 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 748 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 749 ), 750 exp.UnixToStr: lambda self, e: self.func( 751 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 752 ), 753 exp.DatetimeTrunc: lambda self, e: self.func( 754 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 755 ), 756 exp.UnixToTime: _unix_to_time_sql, 757 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 758 exp.VariancePop: rename_func("VAR_POP"), 759 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 760 exp.Xor: bool_xor_sql, 761 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 762 rename_func("LEVENSHTEIN") 763 ), 764 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 765 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 766 exp.DateBin: rename_func("TIME_BUCKET"), 767 } 768 769 SUPPORTED_JSON_PATH_PARTS = { 770 exp.JSONPathKey, 771 exp.JSONPathRoot, 772 exp.JSONPathSubscript, 773 exp.JSONPathWildcard, 774 } 775 776 TYPE_MAPPING = { 777 **generator.Generator.TYPE_MAPPING, 778 exp.DataType.Type.BINARY: "BLOB", 779 exp.DataType.Type.BPCHAR: "TEXT", 780 exp.DataType.Type.CHAR: "TEXT", 781 exp.DataType.Type.DATETIME: "TIMESTAMP", 782 exp.DataType.Type.FLOAT: "REAL", 783 exp.DataType.Type.JSONB: "JSON", 784 exp.DataType.Type.NCHAR: "TEXT", 785 exp.DataType.Type.NVARCHAR: "TEXT", 786 exp.DataType.Type.UINT: "UINTEGER", 787 exp.DataType.Type.VARBINARY: "BLOB", 788 exp.DataType.Type.ROWVERSION: "BLOB", 789 exp.DataType.Type.VARCHAR: "TEXT", 790 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 791 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 792 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 793 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 794 } 795 796 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 797 RESERVED_KEYWORDS = { 798 "array", 799 "analyse", 800 "union", 801 "all", 802 "when", 803 "in_p", 804 "default", 805 "create_p", 806 "window", 807 "asymmetric", 808 "to", 809 "else", 810 "localtime", 811 "from", 812 "end_p", 813 "select", 814 "current_date", 815 "foreign", 816 "with", 817 "grant", 818 "session_user", 819 "or", 820 "except", 821 "references", 822 "fetch", 823 "limit", 824 "group_p", 825 "leading", 826 "into", 827 "collate", 828 "offset", 829 "do", 830 "then", 831 "localtimestamp", 832 "check_p", 833 "lateral_p", 834 "current_role", 835 "where", 836 "asc_p", 837 "placing", 838 "desc_p", 839 "user", 840 "unique", 841 "initially", 842 "column", 843 "both", 844 "some", 845 "as", 846 "any", 847 "only", 848 "deferrable", 849 "null_p", 850 "current_time", 851 "true_p", 852 "table", 853 "case", 854 "trailing", 855 "variadic", 856 "for", 857 "on", 858 "distinct", 859 "false_p", 860 "not", 861 "constraint", 862 "current_timestamp", 863 "returning", 864 "primary", 865 "intersect", 866 "having", 867 "analyze", 868 "current_user", 869 "and", 870 "cast", 871 "symmetric", 872 "using", 873 "order", 874 "current_catalog", 875 } 876 877 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 878 879 # DuckDB doesn't generally support CREATE TABLE .. properties 880 # https://duckdb.org/docs/sql/statements/create_table.html 881 PROPERTIES_LOCATION = { 882 prop: exp.Properties.Location.UNSUPPORTED 883 for prop in generator.Generator.PROPERTIES_LOCATION 884 } 885 886 # There are a few exceptions (e.g. temporary tables) which are supported or 887 # can be transpiled to DuckDB, so we explicitly override them accordingly 888 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 889 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 890 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 891 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 892 893 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 894 exp.FirstValue, 895 exp.Lag, 896 exp.LastValue, 897 exp.Lead, 898 exp.NthValue, 899 ) 900 901 def lambda_sql( 902 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 903 ) -> str: 904 if expression.args.get("colon"): 905 prefix = "LAMBDA " 906 arrow_sep = ":" 907 wrap = False 908 else: 909 prefix = "" 910 911 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 912 return f"{prefix}{lambda_sql}" 913 914 def show_sql(self, expression: exp.Show) -> str: 915 return f"SHOW {expression.name}" 916 917 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 918 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 919 920 def strtotime_sql(self, expression: exp.StrToTime) -> str: 921 if expression.args.get("safe"): 922 formatted_time = self.format_time(expression) 923 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 924 return str_to_time_sql(self, expression) 925 926 def strtodate_sql(self, expression: exp.StrToDate) -> str: 927 if expression.args.get("safe"): 928 formatted_time = self.format_time(expression) 929 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 930 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 931 932 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 933 arg = expression.this 934 if expression.args.get("safe"): 935 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 936 return self.func("JSON", arg) 937 938 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 939 nano = expression.args.get("nano") 940 if nano is not None: 941 expression.set( 942 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 943 ) 944 945 return rename_func("MAKE_TIME")(self, expression) 946 947 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 948 sec = expression.args["sec"] 949 950 milli = expression.args.get("milli") 951 if milli is not None: 952 sec += milli.pop() / exp.Literal.number(1000.0) 953 954 nano = expression.args.get("nano") 955 if nano is not None: 956 sec += nano.pop() / exp.Literal.number(1000000000.0) 957 958 if milli or nano: 959 expression.set("sec", sec) 960 961 return rename_func("MAKE_TIMESTAMP")(self, expression) 962 963 def tablesample_sql( 964 self, 965 expression: exp.TableSample, 966 tablesample_keyword: t.Optional[str] = None, 967 ) -> str: 968 if not isinstance(expression.parent, exp.Select): 969 # This sample clause only applies to a single source, not the entire resulting relation 970 tablesample_keyword = "TABLESAMPLE" 971 972 if expression.args.get("size"): 973 method = expression.args.get("method") 974 if method and method.name.upper() != "RESERVOIR": 975 self.unsupported( 976 f"Sampling method {method} is not supported with a discrete sample count, " 977 "defaulting to reservoir sampling" 978 ) 979 expression.set("method", exp.var("RESERVOIR")) 980 981 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 982 983 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 984 if isinstance(expression.parent, exp.UserDefinedFunction): 985 return self.sql(expression, "this") 986 return super().columndef_sql(expression, sep) 987 988 def join_sql(self, expression: exp.Join) -> str: 989 if ( 990 expression.side == "LEFT" 991 and not expression.args.get("on") 992 and isinstance(expression.this, exp.Unnest) 993 ): 994 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 995 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 996 return super().join_sql(expression.on(exp.true())) 997 998 return super().join_sql(expression) 999 1000 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1001 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1002 if expression.args.get("is_end_exclusive"): 1003 return rename_func("RANGE")(self, expression) 1004 1005 return self.function_fallback_sql(expression) 1006 1007 def countif_sql(self, expression: exp.CountIf) -> str: 1008 if self.dialect.version >= Version("1.2"): 1009 return self.function_fallback_sql(expression) 1010 1011 # https://github.com/tobymao/sqlglot/pull/4749 1012 return count_if_to_sum(self, expression) 1013 1014 def bracket_sql(self, expression: exp.Bracket) -> str: 1015 if self.dialect.version >= Version("1.2"): 1016 return super().bracket_sql(expression) 1017 1018 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1019 this = expression.this 1020 if isinstance(this, exp.Array): 1021 this.replace(exp.paren(this)) 1022 1023 bracket = super().bracket_sql(expression) 1024 1025 if not expression.args.get("returns_list_for_maps"): 1026 if not this.type: 1027 from sqlglot.optimizer.annotate_types import annotate_types 1028 1029 this = annotate_types(this, dialect=self.dialect) 1030 1031 if this.is_type(exp.DataType.Type.MAP): 1032 bracket = f"({bracket})[1]" 1033 1034 return bracket 1035 1036 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1037 expression_sql = self.sql(expression, "expression") 1038 1039 func = expression.this 1040 if isinstance(func, exp.PERCENTILES): 1041 # Make the order key the first arg and slide the fraction to the right 1042 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1043 order_col = expression.find(exp.Ordered) 1044 if order_col: 1045 func.set("expression", func.this) 1046 func.set("this", order_col.this) 1047 1048 this = self.sql(expression, "this").rstrip(")") 1049 1050 return f"{this}{expression_sql})" 1051 1052 def length_sql(self, expression: exp.Length) -> str: 1053 arg = expression.this 1054 1055 # Dialects like BQ and Snowflake also accept binary values as args, so 1056 # DDB will attempt to infer the type or resort to case/when resolution 1057 if not expression.args.get("binary") or arg.is_string: 1058 return self.func("LENGTH", arg) 1059 1060 if not arg.type: 1061 from sqlglot.optimizer.annotate_types import annotate_types 1062 1063 arg = annotate_types(arg, dialect=self.dialect) 1064 1065 if arg.is_type(*exp.DataType.TEXT_TYPES): 1066 return self.func("LENGTH", arg) 1067 1068 # We need these casts to make duckdb's static type checker happy 1069 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1070 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1071 1072 case = ( 1073 exp.case(self.func("TYPEOF", arg)) 1074 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1075 .else_( 1076 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1077 ) # anonymous to break length_sql recursion 1078 ) 1079 1080 return self.sql(case) 1081 1082 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1083 this = expression.this 1084 key = expression.args.get("key") 1085 key_sql = key.name if isinstance(key, exp.Expression) else "" 1086 value_sql = self.sql(expression, "value") 1087 1088 kv_sql = f"{key_sql} := {value_sql}" 1089 1090 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1091 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1092 if isinstance(this, exp.Struct) and not this.expressions: 1093 return self.func("STRUCT_PACK", kv_sql) 1094 1095 return self.func("STRUCT_INSERT", this, kv_sql) 1096 1097 def unnest_sql(self, expression: exp.Unnest) -> str: 1098 explode_array = expression.args.get("explode_array") 1099 if explode_array: 1100 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1101 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1102 expression.expressions.append( 1103 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1104 ) 1105 1106 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1107 alias = expression.args.get("alias") 1108 if isinstance(alias, exp.TableAlias): 1109 expression.set("alias", None) 1110 if alias.columns: 1111 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1112 1113 unnest_sql = super().unnest_sql(expression) 1114 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1115 return self.sql(select) 1116 1117 return super().unnest_sql(expression) 1118 1119 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1120 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1121 # DuckDB should render IGNORE NULLS only for the general-purpose 1122 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1123 return super().ignorenulls_sql(expression) 1124 1125 if not isinstance(expression.this, exp.AnyValue): 1126 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1127 1128 return self.sql(expression, "this") 1129 1130 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1131 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1132 # DuckDB should render RESPECT NULLS only for the general-purpose 1133 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1134 return super().respectnulls_sql(expression) 1135 1136 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1137 return self.sql(expression, "this") 1138 1139 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1140 this = self.sql(expression, "this") 1141 null_text = self.sql(expression, "null") 1142 1143 if null_text: 1144 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1145 1146 return self.func("ARRAY_TO_STRING", this, expression.expression) 1147 1148 @unsupported_args("position", "occurrence") 1149 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1150 group = expression.args.get("group") 1151 params = expression.args.get("parameters") 1152 1153 # Do not render group if there is no following argument, 1154 # and it's the default value for this dialect 1155 if ( 1156 not params 1157 and group 1158 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1159 ): 1160 group = None 1161 return self.func( 1162 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1163 ) 1164 1165 @unsupported_args("culture") 1166 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1167 fmt = expression.args.get("format") 1168 if fmt and fmt.is_int: 1169 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1170 1171 self.unsupported("Only integer formats are supported by NumberToStr") 1172 return self.function_fallback_sql(expression) 1173 1174 def autoincrementcolumnconstraint_sql(self, _) -> str: 1175 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1176 return "" 1177 1178 def aliases_sql(self, expression: exp.Aliases) -> str: 1179 this = expression.this 1180 if isinstance(this, exp.Posexplode): 1181 return self.posexplode_sql(this) 1182 1183 return super().aliases_sql(expression) 1184 1185 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1186 this = expression.this 1187 parent = expression.parent 1188 1189 # The default Spark aliases are "pos" and "col", unless specified otherwise 1190 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1191 1192 if isinstance(parent, exp.Aliases): 1193 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1194 pos, col = parent.expressions 1195 elif isinstance(parent, exp.Table): 1196 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1197 alias = parent.args.get("alias") 1198 if alias: 1199 pos, col = alias.columns or [pos, col] 1200 alias.pop() 1201 1202 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1203 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1204 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1205 gen_subscripts = self.sql( 1206 exp.Alias( 1207 this=exp.Anonymous( 1208 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1209 ) 1210 - exp.Literal.number(1), 1211 alias=pos, 1212 ) 1213 ) 1214 1215 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1216 1217 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1218 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1219 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1220 1221 return posexplode_sql 1222 1223 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1224 this = expression.this 1225 1226 if not this.type: 1227 from sqlglot.optimizer.annotate_types import annotate_types 1228 1229 this = annotate_types(this, dialect=self.dialect) 1230 1231 if this.is_type(*exp.DataType.TEXT_TYPES): 1232 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1233 1234 func = self.func( 1235 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1236 ) 1237 1238 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1239 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1240 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1241 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1242 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1243 return self.sql(exp.Cast(this=func, to=this.type)) 1244 1245 return self.sql(func)
255class DuckDB(Dialect): 256 NULL_ORDERING = "nulls_are_last" 257 SUPPORTS_USER_DEFINED_TYPES = True 258 SAFE_DIVISION = True 259 INDEX_OFFSET = 1 260 CONCAT_COALESCE = True 261 SUPPORTS_ORDER_BY_ALL = True 262 SUPPORTS_FIXED_SIZE_ARRAYS = True 263 STRICT_JSON_PATH_SYNTAX = False 264 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 265 266 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 DATE_PART_MAPPING = { 270 **Dialect.DATE_PART_MAPPING, 271 "DAYOFWEEKISO": "ISODOW", 272 } 273 DATE_PART_MAPPING.pop("WEEKDAY") 274 275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path) 286 287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "EXCLUDE": TokenType.EXCEPT, 309 "LOGICAL": TokenType.BOOLEAN, 310 "ONLY": TokenType.ONLY, 311 "PIVOT_WIDER": TokenType.PIVOT, 312 "POSITIONAL": TokenType.POSITIONAL, 313 "RESET": TokenType.COMMAND, 314 "SIGNED": TokenType.INT, 315 "STRING": TokenType.TEXT, 316 "SUMMARIZE": TokenType.SUMMARIZE, 317 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 318 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 319 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 320 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 321 "TIMESTAMP_US": TokenType.TIMESTAMP, 322 "UBIGINT": TokenType.UBIGINT, 323 "UINTEGER": TokenType.UINT, 324 "USMALLINT": TokenType.USMALLINT, 325 "UTINYINT": TokenType.UTINYINT, 326 "VARCHAR": TokenType.TEXT, 327 } 328 KEYWORDS.pop("/*+") 329 330 SINGLE_TOKENS = { 331 **tokens.Tokenizer.SINGLE_TOKENS, 332 "$": TokenType.PARAMETER, 333 } 334 335 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 336 337 class Parser(parser.Parser): 338 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 339 340 BITWISE = { 341 **parser.Parser.BITWISE, 342 TokenType.TILDA: exp.RegexpLike, 343 } 344 BITWISE.pop(TokenType.CARET) 345 346 RANGE_PARSERS = { 347 **parser.Parser.RANGE_PARSERS, 348 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 349 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 350 } 351 352 EXPONENT = { 353 **parser.Parser.EXPONENT, 354 TokenType.CARET: exp.Pow, 355 TokenType.DSTAR: exp.Pow, 356 } 357 358 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 359 360 SHOW_PARSERS = { 361 "TABLES": _show_parser("TABLES"), 362 "ALL TABLES": _show_parser("ALL TABLES"), 363 } 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "EDITDIST3": exp.Levenshtein.from_arg_list, 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "GENERATE_SERIES": _build_generate_series(), 386 "JSON": exp.ParseJSON.from_arg_list, 387 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 388 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 389 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 390 "LIST_HAS": exp.ArrayContains.from_arg_list, 391 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 392 "LIST_REVERSE_SORT": _build_sort_array_desc, 393 "LIST_SORT": exp.SortArray.from_arg_list, 394 "LIST_VALUE": lambda args: exp.Array(expressions=args), 395 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 396 "MAKE_TIMESTAMP": _build_make_timestamp, 397 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 398 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 399 "RANGE": _build_generate_series(end_exclusive=True), 400 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 401 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 402 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 403 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 404 this=seq_get(args, 0), 405 expression=seq_get(args, 1), 406 replacement=seq_get(args, 2), 407 modifiers=seq_get(args, 3), 408 ), 409 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 410 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 411 "STRING_SPLIT": exp.Split.from_arg_list, 412 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 413 "STRING_TO_ARRAY": exp.Split.from_arg_list, 414 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 415 "STRUCT_PACK": exp.Struct.from_arg_list, 416 "STR_SPLIT": exp.Split.from_arg_list, 417 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 418 "TIME_BUCKET": exp.DateBin.from_arg_list, 419 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 420 "UNNEST": exp.Explode.from_arg_list, 421 "XOR": binary_from_function(exp.BitwiseXor), 422 } 423 424 FUNCTIONS.pop("DATE_SUB") 425 FUNCTIONS.pop("GLOB") 426 427 FUNCTION_PARSERS = { 428 **parser.Parser.FUNCTION_PARSERS, 429 **dict.fromkeys( 430 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 431 ), 432 } 433 FUNCTION_PARSERS.pop("DECODE") 434 435 NO_PAREN_FUNCTION_PARSERS = { 436 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 437 "MAP": lambda self: self._parse_map(), 438 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 439 } 440 441 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 442 TokenType.SEMI, 443 TokenType.ANTI, 444 } 445 446 PLACEHOLDER_PARSERS = { 447 **parser.Parser.PLACEHOLDER_PARSERS, 448 TokenType.PARAMETER: lambda self: ( 449 self.expression(exp.Placeholder, this=self._prev.text) 450 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 451 else None 452 ), 453 } 454 455 TYPE_CONVERTERS = { 456 # https://duckdb.org/docs/sql/data_types/numeric 457 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 458 # https://duckdb.org/docs/sql/data_types/text 459 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 460 } 461 462 STATEMENT_PARSERS = { 463 **parser.Parser.STATEMENT_PARSERS, 464 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 465 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 466 TokenType.SHOW: lambda self: self._parse_show(), 467 } 468 469 SET_PARSERS = { 470 **parser.Parser.SET_PARSERS, 471 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 472 } 473 474 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 475 index = self._index 476 if not self._match_text_seq("LAMBDA"): 477 return super()._parse_lambda(alias=alias) 478 479 expressions = self._parse_csv(self._parse_lambda_arg) 480 if not self._match(TokenType.COLON): 481 self._retreat(index) 482 return None 483 484 this = self._replace_lambda(self._parse_assignment(), expressions) 485 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 486 487 def _parse_expression(self) -> t.Optional[exp.Expression]: 488 # DuckDB supports prefix aliases, e.g. foo: 1 489 if self._next and self._next.token_type == TokenType.COLON: 490 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 491 self._match(TokenType.COLON) 492 comments = self._prev_comments or [] 493 494 this = self._parse_assignment() 495 if isinstance(this, exp.Expression): 496 # Moves the comment next to the alias in `alias: expr /* comment */` 497 comments += this.pop_comments() or [] 498 499 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 500 501 return super()._parse_expression() 502 503 def _parse_table( 504 self, 505 schema: bool = False, 506 joins: bool = False, 507 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 508 parse_bracket: bool = False, 509 is_db_reference: bool = False, 510 parse_partition: bool = False, 511 consume_pipe: bool = False, 512 ) -> t.Optional[exp.Expression]: 513 # DuckDB supports prefix aliases, e.g. FROM foo: bar 514 if self._next and self._next.token_type == TokenType.COLON: 515 alias = self._parse_table_alias( 516 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 517 ) 518 self._match(TokenType.COLON) 519 comments = self._prev_comments or [] 520 else: 521 alias = None 522 comments = [] 523 524 table = super()._parse_table( 525 schema=schema, 526 joins=joins, 527 alias_tokens=alias_tokens, 528 parse_bracket=parse_bracket, 529 is_db_reference=is_db_reference, 530 parse_partition=parse_partition, 531 ) 532 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 533 # Moves the comment next to the alias in `alias: table /* comment */` 534 comments += table.pop_comments() or [] 535 alias.comments = alias.pop_comments() + comments 536 table.set("alias", alias) 537 538 return table 539 540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 541 # https://duckdb.org/docs/sql/samples.html 542 sample = super()._parse_table_sample(as_modifier=as_modifier) 543 if sample and not sample.args.get("method"): 544 if sample.args.get("size"): 545 sample.set("method", exp.var("RESERVOIR")) 546 else: 547 sample.set("method", exp.var("SYSTEM")) 548 549 return sample 550 551 def _parse_bracket( 552 self, this: t.Optional[exp.Expression] = None 553 ) -> t.Optional[exp.Expression]: 554 bracket = super()._parse_bracket(this) 555 556 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 557 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 558 bracket.set("returns_list_for_maps", True) 559 560 return bracket 561 562 def _parse_map(self) -> exp.ToMap | exp.Map: 563 if self._match(TokenType.L_BRACE, advance=False): 564 return self.expression(exp.ToMap, this=self._parse_bracket()) 565 566 args = self._parse_wrapped_csv(self._parse_assignment) 567 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 568 569 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 570 return self._parse_field_def() 571 572 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 573 if len(aggregations) == 1: 574 return super()._pivot_column_names(aggregations) 575 return pivot_column_names(aggregations, dialect="duckdb") 576 577 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 578 def _parse_attach_option() -> exp.AttachOption: 579 return self.expression( 580 exp.AttachOption, 581 this=self._parse_var(any_token=True), 582 expression=self._parse_field(any_token=True), 583 ) 584 585 self._match(TokenType.DATABASE) 586 exists = self._parse_exists(not_=is_attach) 587 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 588 589 if self._match(TokenType.L_PAREN, advance=False): 590 expressions = self._parse_wrapped_csv(_parse_attach_option) 591 else: 592 expressions = None 593 594 return ( 595 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 596 if is_attach 597 else self.expression(exp.Detach, this=this, exists=exists) 598 ) 599 600 def _parse_show_duckdb(self, this: str) -> exp.Show: 601 return self.expression(exp.Show, this=this) 602 603 def _parse_primary(self) -> t.Optional[exp.Expression]: 604 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 605 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 606 607 return super()._parse_primary() 608 609 class Generator(generator.Generator): 610 PARAMETER_TOKEN = "$" 611 NAMED_PLACEHOLDER_TOKEN = "$" 612 JOIN_HINTS = False 613 TABLE_HINTS = False 614 QUERY_HINTS = False 615 LIMIT_FETCH = "LIMIT" 616 STRUCT_DELIMITER = ("(", ")") 617 RENAME_TABLE_WITH_DB = False 618 NVL2_SUPPORTED = False 619 SEMI_ANTI_JOIN_WITH_SIDE = False 620 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 621 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 622 LAST_DAY_SUPPORTS_DATE_PART = False 623 JSON_KEY_VALUE_PAIR_SEP = "," 624 IGNORE_NULLS_IN_FUNC = True 625 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 626 SUPPORTS_CREATE_TABLE_LIKE = False 627 MULTI_ARG_DISTINCT = False 628 CAN_IMPLEMENT_ARRAY_ANY = True 629 SUPPORTS_TO_NUMBER = False 630 SUPPORTS_WINDOW_EXCLUDE = True 631 COPY_HAS_INTO_KEYWORD = False 632 STAR_EXCEPT = "EXCLUDE" 633 PAD_FILL_PATTERN_IS_REQUIRED = True 634 ARRAY_CONCAT_IS_VAR_LEN = False 635 ARRAY_SIZE_DIM_REQUIRED = False 636 NORMALIZE_EXTRACT_DATE_PARTS = True 637 SUPPORTS_LIKE_QUANTIFIERS = False 638 639 TRANSFORMS = { 640 **generator.Generator.TRANSFORMS, 641 exp.ApproxDistinct: approx_count_distinct_sql, 642 exp.Array: inline_array_unless_query, 643 exp.ArrayFilter: rename_func("LIST_FILTER"), 644 exp.ArrayRemove: remove_from_array_using_filter, 645 exp.ArraySort: _array_sort_sql, 646 exp.ArraySum: rename_func("LIST_SUM"), 647 exp.BitwiseXor: rename_func("XOR"), 648 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 649 exp.CurrentDate: lambda *_: "CURRENT_DATE", 650 exp.CurrentTime: lambda *_: "CURRENT_TIME", 651 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 652 exp.DayOfMonth: rename_func("DAYOFMONTH"), 653 exp.DayOfWeek: rename_func("DAYOFWEEK"), 654 exp.DayOfWeekIso: rename_func("ISODOW"), 655 exp.DayOfYear: rename_func("DAYOFYEAR"), 656 exp.DataType: _datatype_sql, 657 exp.Date: _date_sql, 658 exp.DateAdd: date_delta_to_binary_interval_op(), 659 exp.DateFromParts: rename_func("MAKE_DATE"), 660 exp.DateSub: date_delta_to_binary_interval_op(), 661 exp.DateDiff: _date_diff_sql, 662 exp.DateStrToDate: datestrtodate_sql, 663 exp.Datetime: no_datetime_sql, 664 exp.DatetimeSub: date_delta_to_binary_interval_op(), 665 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 666 exp.DateToDi: lambda self, 667 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 668 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 669 exp.DiToDate: lambda self, 670 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 671 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 672 exp.GenerateDateArray: _generate_datetime_array_sql, 673 exp.GenerateTimestampArray: _generate_datetime_array_sql, 674 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 675 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 676 exp.Explode: rename_func("UNNEST"), 677 exp.IntDiv: lambda self, e: self.binary(e, "//"), 678 exp.IsInf: rename_func("ISINF"), 679 exp.IsNan: rename_func("ISNAN"), 680 exp.JSONBExists: rename_func("JSON_EXISTS"), 681 exp.JSONExtract: _arrow_json_extract_sql, 682 exp.JSONExtractArray: _json_extract_value_array_sql, 683 exp.JSONExtractScalar: _arrow_json_extract_sql, 684 exp.JSONFormat: _json_format_sql, 685 exp.JSONValueArray: _json_extract_value_array_sql, 686 exp.Lateral: explode_to_unnest_sql, 687 exp.LogicalOr: rename_func("BOOL_OR"), 688 exp.LogicalAnd: rename_func("BOOL_AND"), 689 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 690 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 691 exp.MonthsBetween: lambda self, e: self.func( 692 "DATEDIFF", 693 "'month'", 694 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 695 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 696 ), 697 exp.PercentileCont: rename_func("QUANTILE_CONT"), 698 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 699 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 700 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 701 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 702 exp.RegexpReplace: lambda self, e: self.func( 703 "REGEXP_REPLACE", 704 e.this, 705 e.expression, 706 e.args.get("replacement"), 707 e.args.get("modifiers"), 708 ), 709 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 710 exp.RegexpILike: lambda self, e: self.func( 711 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 712 ), 713 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 714 exp.Return: lambda self, e: self.sql(e, "this"), 715 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 716 exp.Rand: rename_func("RANDOM"), 717 exp.SHA: rename_func("SHA1"), 718 exp.SHA2: sha256_sql, 719 exp.Split: rename_func("STR_SPLIT"), 720 exp.SortArray: _sort_array_sql, 721 exp.StrPosition: strposition_sql, 722 exp.StrToUnix: lambda self, e: self.func( 723 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 724 ), 725 exp.Struct: _struct_sql, 726 exp.Transform: rename_func("LIST_TRANSFORM"), 727 exp.TimeAdd: date_delta_to_binary_interval_op(), 728 exp.Time: no_time_sql, 729 exp.TimeDiff: _timediff_sql, 730 exp.Timestamp: no_timestamp_sql, 731 exp.TimestampDiff: lambda self, e: self.func( 732 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 733 ), 734 exp.TimestampTrunc: timestamptrunc_sql(), 735 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 736 exp.TimeStrToTime: timestrtotime_sql, 737 exp.TimeStrToUnix: lambda self, e: self.func( 738 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 739 ), 740 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 741 exp.TimeToUnix: rename_func("EPOCH"), 742 exp.TsOrDiToDi: lambda self, 743 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 744 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 745 exp.TsOrDsDiff: lambda self, e: self.func( 746 "DATE_DIFF", 747 f"'{e.args.get('unit') or 'DAY'}'", 748 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 749 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 750 ), 751 exp.UnixToStr: lambda self, e: self.func( 752 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 753 ), 754 exp.DatetimeTrunc: lambda self, e: self.func( 755 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 756 ), 757 exp.UnixToTime: _unix_to_time_sql, 758 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 759 exp.VariancePop: rename_func("VAR_POP"), 760 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 761 exp.Xor: bool_xor_sql, 762 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 763 rename_func("LEVENSHTEIN") 764 ), 765 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 766 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 767 exp.DateBin: rename_func("TIME_BUCKET"), 768 } 769 770 SUPPORTED_JSON_PATH_PARTS = { 771 exp.JSONPathKey, 772 exp.JSONPathRoot, 773 exp.JSONPathSubscript, 774 exp.JSONPathWildcard, 775 } 776 777 TYPE_MAPPING = { 778 **generator.Generator.TYPE_MAPPING, 779 exp.DataType.Type.BINARY: "BLOB", 780 exp.DataType.Type.BPCHAR: "TEXT", 781 exp.DataType.Type.CHAR: "TEXT", 782 exp.DataType.Type.DATETIME: "TIMESTAMP", 783 exp.DataType.Type.FLOAT: "REAL", 784 exp.DataType.Type.JSONB: "JSON", 785 exp.DataType.Type.NCHAR: "TEXT", 786 exp.DataType.Type.NVARCHAR: "TEXT", 787 exp.DataType.Type.UINT: "UINTEGER", 788 exp.DataType.Type.VARBINARY: "BLOB", 789 exp.DataType.Type.ROWVERSION: "BLOB", 790 exp.DataType.Type.VARCHAR: "TEXT", 791 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 792 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 793 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 794 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 795 } 796 797 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 798 RESERVED_KEYWORDS = { 799 "array", 800 "analyse", 801 "union", 802 "all", 803 "when", 804 "in_p", 805 "default", 806 "create_p", 807 "window", 808 "asymmetric", 809 "to", 810 "else", 811 "localtime", 812 "from", 813 "end_p", 814 "select", 815 "current_date", 816 "foreign", 817 "with", 818 "grant", 819 "session_user", 820 "or", 821 "except", 822 "references", 823 "fetch", 824 "limit", 825 "group_p", 826 "leading", 827 "into", 828 "collate", 829 "offset", 830 "do", 831 "then", 832 "localtimestamp", 833 "check_p", 834 "lateral_p", 835 "current_role", 836 "where", 837 "asc_p", 838 "placing", 839 "desc_p", 840 "user", 841 "unique", 842 "initially", 843 "column", 844 "both", 845 "some", 846 "as", 847 "any", 848 "only", 849 "deferrable", 850 "null_p", 851 "current_time", 852 "true_p", 853 "table", 854 "case", 855 "trailing", 856 "variadic", 857 "for", 858 "on", 859 "distinct", 860 "false_p", 861 "not", 862 "constraint", 863 "current_timestamp", 864 "returning", 865 "primary", 866 "intersect", 867 "having", 868 "analyze", 869 "current_user", 870 "and", 871 "cast", 872 "symmetric", 873 "using", 874 "order", 875 "current_catalog", 876 } 877 878 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 879 880 # DuckDB doesn't generally support CREATE TABLE .. properties 881 # https://duckdb.org/docs/sql/statements/create_table.html 882 PROPERTIES_LOCATION = { 883 prop: exp.Properties.Location.UNSUPPORTED 884 for prop in generator.Generator.PROPERTIES_LOCATION 885 } 886 887 # There are a few exceptions (e.g. temporary tables) which are supported or 888 # can be transpiled to DuckDB, so we explicitly override them accordingly 889 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 890 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 891 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 892 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 893 894 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 895 exp.FirstValue, 896 exp.Lag, 897 exp.LastValue, 898 exp.Lead, 899 exp.NthValue, 900 ) 901 902 def lambda_sql( 903 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 904 ) -> str: 905 if expression.args.get("colon"): 906 prefix = "LAMBDA " 907 arrow_sep = ":" 908 wrap = False 909 else: 910 prefix = "" 911 912 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 913 return f"{prefix}{lambda_sql}" 914 915 def show_sql(self, expression: exp.Show) -> str: 916 return f"SHOW {expression.name}" 917 918 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 919 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 920 921 def strtotime_sql(self, expression: exp.StrToTime) -> str: 922 if expression.args.get("safe"): 923 formatted_time = self.format_time(expression) 924 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 925 return str_to_time_sql(self, expression) 926 927 def strtodate_sql(self, expression: exp.StrToDate) -> str: 928 if expression.args.get("safe"): 929 formatted_time = self.format_time(expression) 930 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 931 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 932 933 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 934 arg = expression.this 935 if expression.args.get("safe"): 936 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 937 return self.func("JSON", arg) 938 939 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 940 nano = expression.args.get("nano") 941 if nano is not None: 942 expression.set( 943 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 944 ) 945 946 return rename_func("MAKE_TIME")(self, expression) 947 948 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 949 sec = expression.args["sec"] 950 951 milli = expression.args.get("milli") 952 if milli is not None: 953 sec += milli.pop() / exp.Literal.number(1000.0) 954 955 nano = expression.args.get("nano") 956 if nano is not None: 957 sec += nano.pop() / exp.Literal.number(1000000000.0) 958 959 if milli or nano: 960 expression.set("sec", sec) 961 962 return rename_func("MAKE_TIMESTAMP")(self, expression) 963 964 def tablesample_sql( 965 self, 966 expression: exp.TableSample, 967 tablesample_keyword: t.Optional[str] = None, 968 ) -> str: 969 if not isinstance(expression.parent, exp.Select): 970 # This sample clause only applies to a single source, not the entire resulting relation 971 tablesample_keyword = "TABLESAMPLE" 972 973 if expression.args.get("size"): 974 method = expression.args.get("method") 975 if method and method.name.upper() != "RESERVOIR": 976 self.unsupported( 977 f"Sampling method {method} is not supported with a discrete sample count, " 978 "defaulting to reservoir sampling" 979 ) 980 expression.set("method", exp.var("RESERVOIR")) 981 982 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 983 984 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 985 if isinstance(expression.parent, exp.UserDefinedFunction): 986 return self.sql(expression, "this") 987 return super().columndef_sql(expression, sep) 988 989 def join_sql(self, expression: exp.Join) -> str: 990 if ( 991 expression.side == "LEFT" 992 and not expression.args.get("on") 993 and isinstance(expression.this, exp.Unnest) 994 ): 995 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 996 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 997 return super().join_sql(expression.on(exp.true())) 998 999 return super().join_sql(expression) 1000 1001 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1002 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1003 if expression.args.get("is_end_exclusive"): 1004 return rename_func("RANGE")(self, expression) 1005 1006 return self.function_fallback_sql(expression) 1007 1008 def countif_sql(self, expression: exp.CountIf) -> str: 1009 if self.dialect.version >= Version("1.2"): 1010 return self.function_fallback_sql(expression) 1011 1012 # https://github.com/tobymao/sqlglot/pull/4749 1013 return count_if_to_sum(self, expression) 1014 1015 def bracket_sql(self, expression: exp.Bracket) -> str: 1016 if self.dialect.version >= Version("1.2"): 1017 return super().bracket_sql(expression) 1018 1019 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1020 this = expression.this 1021 if isinstance(this, exp.Array): 1022 this.replace(exp.paren(this)) 1023 1024 bracket = super().bracket_sql(expression) 1025 1026 if not expression.args.get("returns_list_for_maps"): 1027 if not this.type: 1028 from sqlglot.optimizer.annotate_types import annotate_types 1029 1030 this = annotate_types(this, dialect=self.dialect) 1031 1032 if this.is_type(exp.DataType.Type.MAP): 1033 bracket = f"({bracket})[1]" 1034 1035 return bracket 1036 1037 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1038 expression_sql = self.sql(expression, "expression") 1039 1040 func = expression.this 1041 if isinstance(func, exp.PERCENTILES): 1042 # Make the order key the first arg and slide the fraction to the right 1043 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1044 order_col = expression.find(exp.Ordered) 1045 if order_col: 1046 func.set("expression", func.this) 1047 func.set("this", order_col.this) 1048 1049 this = self.sql(expression, "this").rstrip(")") 1050 1051 return f"{this}{expression_sql})" 1052 1053 def length_sql(self, expression: exp.Length) -> str: 1054 arg = expression.this 1055 1056 # Dialects like BQ and Snowflake also accept binary values as args, so 1057 # DDB will attempt to infer the type or resort to case/when resolution 1058 if not expression.args.get("binary") or arg.is_string: 1059 return self.func("LENGTH", arg) 1060 1061 if not arg.type: 1062 from sqlglot.optimizer.annotate_types import annotate_types 1063 1064 arg = annotate_types(arg, dialect=self.dialect) 1065 1066 if arg.is_type(*exp.DataType.TEXT_TYPES): 1067 return self.func("LENGTH", arg) 1068 1069 # We need these casts to make duckdb's static type checker happy 1070 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1071 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1072 1073 case = ( 1074 exp.case(self.func("TYPEOF", arg)) 1075 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1076 .else_( 1077 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1078 ) # anonymous to break length_sql recursion 1079 ) 1080 1081 return self.sql(case) 1082 1083 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1084 this = expression.this 1085 key = expression.args.get("key") 1086 key_sql = key.name if isinstance(key, exp.Expression) else "" 1087 value_sql = self.sql(expression, "value") 1088 1089 kv_sql = f"{key_sql} := {value_sql}" 1090 1091 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1092 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1093 if isinstance(this, exp.Struct) and not this.expressions: 1094 return self.func("STRUCT_PACK", kv_sql) 1095 1096 return self.func("STRUCT_INSERT", this, kv_sql) 1097 1098 def unnest_sql(self, expression: exp.Unnest) -> str: 1099 explode_array = expression.args.get("explode_array") 1100 if explode_array: 1101 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1102 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1103 expression.expressions.append( 1104 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1105 ) 1106 1107 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1108 alias = expression.args.get("alias") 1109 if isinstance(alias, exp.TableAlias): 1110 expression.set("alias", None) 1111 if alias.columns: 1112 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1113 1114 unnest_sql = super().unnest_sql(expression) 1115 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1116 return self.sql(select) 1117 1118 return super().unnest_sql(expression) 1119 1120 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1121 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1122 # DuckDB should render IGNORE NULLS only for the general-purpose 1123 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1124 return super().ignorenulls_sql(expression) 1125 1126 if not isinstance(expression.this, exp.AnyValue): 1127 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1128 1129 return self.sql(expression, "this") 1130 1131 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1132 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1133 # DuckDB should render RESPECT NULLS only for the general-purpose 1134 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1135 return super().respectnulls_sql(expression) 1136 1137 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1138 return self.sql(expression, "this") 1139 1140 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1141 this = self.sql(expression, "this") 1142 null_text = self.sql(expression, "null") 1143 1144 if null_text: 1145 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1146 1147 return self.func("ARRAY_TO_STRING", this, expression.expression) 1148 1149 @unsupported_args("position", "occurrence") 1150 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1151 group = expression.args.get("group") 1152 params = expression.args.get("parameters") 1153 1154 # Do not render group if there is no following argument, 1155 # and it's the default value for this dialect 1156 if ( 1157 not params 1158 and group 1159 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1160 ): 1161 group = None 1162 return self.func( 1163 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1164 ) 1165 1166 @unsupported_args("culture") 1167 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1168 fmt = expression.args.get("format") 1169 if fmt and fmt.is_int: 1170 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1171 1172 self.unsupported("Only integer formats are supported by NumberToStr") 1173 return self.function_fallback_sql(expression) 1174 1175 def autoincrementcolumnconstraint_sql(self, _) -> str: 1176 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1177 return "" 1178 1179 def aliases_sql(self, expression: exp.Aliases) -> str: 1180 this = expression.this 1181 if isinstance(this, exp.Posexplode): 1182 return self.posexplode_sql(this) 1183 1184 return super().aliases_sql(expression) 1185 1186 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1187 this = expression.this 1188 parent = expression.parent 1189 1190 # The default Spark aliases are "pos" and "col", unless specified otherwise 1191 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1192 1193 if isinstance(parent, exp.Aliases): 1194 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1195 pos, col = parent.expressions 1196 elif isinstance(parent, exp.Table): 1197 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1198 alias = parent.args.get("alias") 1199 if alias: 1200 pos, col = alias.columns or [pos, col] 1201 alias.pop() 1202 1203 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1204 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1205 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1206 gen_subscripts = self.sql( 1207 exp.Alias( 1208 this=exp.Anonymous( 1209 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1210 ) 1211 - exp.Literal.number(1), 1212 alias=pos, 1213 ) 1214 ) 1215 1216 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1217 1218 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1219 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1220 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1221 1222 return posexplode_sql 1223 1224 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1225 this = expression.this 1226 1227 if not this.type: 1228 from sqlglot.optimizer.annotate_types import annotate_types 1229 1230 this = annotate_types(this, dialect=self.dialect) 1231 1232 if this.is_type(*exp.DataType.TEXT_TYPES): 1233 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1234 1235 func = self.func( 1236 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1237 ) 1238 1239 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1240 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1241 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1242 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1243 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1244 return self.sql(exp.Cast(this=func, to=this.type)) 1245 1246 return self.sql(func)
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
275 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 276 if isinstance(path, exp.Literal): 277 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 278 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 279 # This check ensures we'll avoid trying to parse these as JSON paths, which can 280 # either result in a noisy warning or in an invalid representation of the path. 281 path_text = path.name 282 if path_text.startswith("/") or "[#" in path_text: 283 return path 284 285 return super().to_json_path(path)
287 class Tokenizer(tokens.Tokenizer): 288 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 289 HEREDOC_STRINGS = ["$"] 290 291 HEREDOC_TAG_IS_IDENTIFIER = True 292 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 "//": TokenType.DIV, 297 "**": TokenType.DSTAR, 298 "^@": TokenType.CARET_AT, 299 "@>": TokenType.AT_GT, 300 "<@": TokenType.LT_AT, 301 "ATTACH": TokenType.ATTACH, 302 "BINARY": TokenType.VARBINARY, 303 "BITSTRING": TokenType.BIT, 304 "BPCHAR": TokenType.TEXT, 305 "CHAR": TokenType.TEXT, 306 "DATETIME": TokenType.TIMESTAMPNTZ, 307 "DETACH": TokenType.DETACH, 308 "EXCLUDE": TokenType.EXCEPT, 309 "LOGICAL": TokenType.BOOLEAN, 310 "ONLY": TokenType.ONLY, 311 "PIVOT_WIDER": TokenType.PIVOT, 312 "POSITIONAL": TokenType.POSITIONAL, 313 "RESET": TokenType.COMMAND, 314 "SIGNED": TokenType.INT, 315 "STRING": TokenType.TEXT, 316 "SUMMARIZE": TokenType.SUMMARIZE, 317 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 318 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 319 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 320 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 321 "TIMESTAMP_US": TokenType.TIMESTAMP, 322 "UBIGINT": TokenType.UBIGINT, 323 "UINTEGER": TokenType.UINT, 324 "USMALLINT": TokenType.USMALLINT, 325 "UTINYINT": TokenType.UTINYINT, 326 "VARCHAR": TokenType.TEXT, 327 } 328 KEYWORDS.pop("/*+") 329 330 SINGLE_TOKENS = { 331 **tokens.Tokenizer.SINGLE_TOKENS, 332 "$": TokenType.PARAMETER, 333 } 334 335 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
337 class Parser(parser.Parser): 338 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 339 340 BITWISE = { 341 **parser.Parser.BITWISE, 342 TokenType.TILDA: exp.RegexpLike, 343 } 344 BITWISE.pop(TokenType.CARET) 345 346 RANGE_PARSERS = { 347 **parser.Parser.RANGE_PARSERS, 348 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 349 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 350 } 351 352 EXPONENT = { 353 **parser.Parser.EXPONENT, 354 TokenType.CARET: exp.Pow, 355 TokenType.DSTAR: exp.Pow, 356 } 357 358 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 359 360 SHOW_PARSERS = { 361 "TABLES": _show_parser("TABLES"), 362 "ALL TABLES": _show_parser("ALL TABLES"), 363 } 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "EDITDIST3": exp.Levenshtein.from_arg_list, 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "GENERATE_SERIES": _build_generate_series(), 386 "JSON": exp.ParseJSON.from_arg_list, 387 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 388 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 389 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 390 "LIST_HAS": exp.ArrayContains.from_arg_list, 391 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 392 "LIST_REVERSE_SORT": _build_sort_array_desc, 393 "LIST_SORT": exp.SortArray.from_arg_list, 394 "LIST_VALUE": lambda args: exp.Array(expressions=args), 395 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 396 "MAKE_TIMESTAMP": _build_make_timestamp, 397 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 398 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 399 "RANGE": _build_generate_series(end_exclusive=True), 400 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 401 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 402 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 403 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 404 this=seq_get(args, 0), 405 expression=seq_get(args, 1), 406 replacement=seq_get(args, 2), 407 modifiers=seq_get(args, 3), 408 ), 409 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 410 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 411 "STRING_SPLIT": exp.Split.from_arg_list, 412 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 413 "STRING_TO_ARRAY": exp.Split.from_arg_list, 414 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 415 "STRUCT_PACK": exp.Struct.from_arg_list, 416 "STR_SPLIT": exp.Split.from_arg_list, 417 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 418 "TIME_BUCKET": exp.DateBin.from_arg_list, 419 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 420 "UNNEST": exp.Explode.from_arg_list, 421 "XOR": binary_from_function(exp.BitwiseXor), 422 } 423 424 FUNCTIONS.pop("DATE_SUB") 425 FUNCTIONS.pop("GLOB") 426 427 FUNCTION_PARSERS = { 428 **parser.Parser.FUNCTION_PARSERS, 429 **dict.fromkeys( 430 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 431 ), 432 } 433 FUNCTION_PARSERS.pop("DECODE") 434 435 NO_PAREN_FUNCTION_PARSERS = { 436 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 437 "MAP": lambda self: self._parse_map(), 438 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 439 } 440 441 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 442 TokenType.SEMI, 443 TokenType.ANTI, 444 } 445 446 PLACEHOLDER_PARSERS = { 447 **parser.Parser.PLACEHOLDER_PARSERS, 448 TokenType.PARAMETER: lambda self: ( 449 self.expression(exp.Placeholder, this=self._prev.text) 450 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 451 else None 452 ), 453 } 454 455 TYPE_CONVERTERS = { 456 # https://duckdb.org/docs/sql/data_types/numeric 457 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 458 # https://duckdb.org/docs/sql/data_types/text 459 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 460 } 461 462 STATEMENT_PARSERS = { 463 **parser.Parser.STATEMENT_PARSERS, 464 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 465 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 466 TokenType.SHOW: lambda self: self._parse_show(), 467 } 468 469 SET_PARSERS = { 470 **parser.Parser.SET_PARSERS, 471 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 472 } 473 474 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 475 index = self._index 476 if not self._match_text_seq("LAMBDA"): 477 return super()._parse_lambda(alias=alias) 478 479 expressions = self._parse_csv(self._parse_lambda_arg) 480 if not self._match(TokenType.COLON): 481 self._retreat(index) 482 return None 483 484 this = self._replace_lambda(self._parse_assignment(), expressions) 485 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 486 487 def _parse_expression(self) -> t.Optional[exp.Expression]: 488 # DuckDB supports prefix aliases, e.g. foo: 1 489 if self._next and self._next.token_type == TokenType.COLON: 490 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 491 self._match(TokenType.COLON) 492 comments = self._prev_comments or [] 493 494 this = self._parse_assignment() 495 if isinstance(this, exp.Expression): 496 # Moves the comment next to the alias in `alias: expr /* comment */` 497 comments += this.pop_comments() or [] 498 499 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 500 501 return super()._parse_expression() 502 503 def _parse_table( 504 self, 505 schema: bool = False, 506 joins: bool = False, 507 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 508 parse_bracket: bool = False, 509 is_db_reference: bool = False, 510 parse_partition: bool = False, 511 consume_pipe: bool = False, 512 ) -> t.Optional[exp.Expression]: 513 # DuckDB supports prefix aliases, e.g. FROM foo: bar 514 if self._next and self._next.token_type == TokenType.COLON: 515 alias = self._parse_table_alias( 516 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 517 ) 518 self._match(TokenType.COLON) 519 comments = self._prev_comments or [] 520 else: 521 alias = None 522 comments = [] 523 524 table = super()._parse_table( 525 schema=schema, 526 joins=joins, 527 alias_tokens=alias_tokens, 528 parse_bracket=parse_bracket, 529 is_db_reference=is_db_reference, 530 parse_partition=parse_partition, 531 ) 532 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 533 # Moves the comment next to the alias in `alias: table /* comment */` 534 comments += table.pop_comments() or [] 535 alias.comments = alias.pop_comments() + comments 536 table.set("alias", alias) 537 538 return table 539 540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 541 # https://duckdb.org/docs/sql/samples.html 542 sample = super()._parse_table_sample(as_modifier=as_modifier) 543 if sample and not sample.args.get("method"): 544 if sample.args.get("size"): 545 sample.set("method", exp.var("RESERVOIR")) 546 else: 547 sample.set("method", exp.var("SYSTEM")) 548 549 return sample 550 551 def _parse_bracket( 552 self, this: t.Optional[exp.Expression] = None 553 ) -> t.Optional[exp.Expression]: 554 bracket = super()._parse_bracket(this) 555 556 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 557 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 558 bracket.set("returns_list_for_maps", True) 559 560 return bracket 561 562 def _parse_map(self) -> exp.ToMap | exp.Map: 563 if self._match(TokenType.L_BRACE, advance=False): 564 return self.expression(exp.ToMap, this=self._parse_bracket()) 565 566 args = self._parse_wrapped_csv(self._parse_assignment) 567 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 568 569 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 570 return self._parse_field_def() 571 572 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 573 if len(aggregations) == 1: 574 return super()._pivot_column_names(aggregations) 575 return pivot_column_names(aggregations, dialect="duckdb") 576 577 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 578 def _parse_attach_option() -> exp.AttachOption: 579 return self.expression( 580 exp.AttachOption, 581 this=self._parse_var(any_token=True), 582 expression=self._parse_field(any_token=True), 583 ) 584 585 self._match(TokenType.DATABASE) 586 exists = self._parse_exists(not_=is_attach) 587 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 588 589 if self._match(TokenType.L_PAREN, advance=False): 590 expressions = self._parse_wrapped_csv(_parse_attach_option) 591 else: 592 expressions = None 593 594 return ( 595 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 596 if is_attach 597 else self.expression(exp.Detach, this=this, exists=exists) 598 ) 599 600 def _parse_show_duckdb(self, this: str) -> exp.Show: 601 return self.expression(exp.Show, this=this) 602 603 def _parse_primary(self) -> t.Optional[exp.Expression]: 604 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 605 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 606 607 return super()._parse_primary()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
609 class Generator(generator.Generator): 610 PARAMETER_TOKEN = "$" 611 NAMED_PLACEHOLDER_TOKEN = "$" 612 JOIN_HINTS = False 613 TABLE_HINTS = False 614 QUERY_HINTS = False 615 LIMIT_FETCH = "LIMIT" 616 STRUCT_DELIMITER = ("(", ")") 617 RENAME_TABLE_WITH_DB = False 618 NVL2_SUPPORTED = False 619 SEMI_ANTI_JOIN_WITH_SIDE = False 620 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 621 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 622 LAST_DAY_SUPPORTS_DATE_PART = False 623 JSON_KEY_VALUE_PAIR_SEP = "," 624 IGNORE_NULLS_IN_FUNC = True 625 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 626 SUPPORTS_CREATE_TABLE_LIKE = False 627 MULTI_ARG_DISTINCT = False 628 CAN_IMPLEMENT_ARRAY_ANY = True 629 SUPPORTS_TO_NUMBER = False 630 SUPPORTS_WINDOW_EXCLUDE = True 631 COPY_HAS_INTO_KEYWORD = False 632 STAR_EXCEPT = "EXCLUDE" 633 PAD_FILL_PATTERN_IS_REQUIRED = True 634 ARRAY_CONCAT_IS_VAR_LEN = False 635 ARRAY_SIZE_DIM_REQUIRED = False 636 NORMALIZE_EXTRACT_DATE_PARTS = True 637 SUPPORTS_LIKE_QUANTIFIERS = False 638 639 TRANSFORMS = { 640 **generator.Generator.TRANSFORMS, 641 exp.ApproxDistinct: approx_count_distinct_sql, 642 exp.Array: inline_array_unless_query, 643 exp.ArrayFilter: rename_func("LIST_FILTER"), 644 exp.ArrayRemove: remove_from_array_using_filter, 645 exp.ArraySort: _array_sort_sql, 646 exp.ArraySum: rename_func("LIST_SUM"), 647 exp.BitwiseXor: rename_func("XOR"), 648 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 649 exp.CurrentDate: lambda *_: "CURRENT_DATE", 650 exp.CurrentTime: lambda *_: "CURRENT_TIME", 651 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 652 exp.DayOfMonth: rename_func("DAYOFMONTH"), 653 exp.DayOfWeek: rename_func("DAYOFWEEK"), 654 exp.DayOfWeekIso: rename_func("ISODOW"), 655 exp.DayOfYear: rename_func("DAYOFYEAR"), 656 exp.DataType: _datatype_sql, 657 exp.Date: _date_sql, 658 exp.DateAdd: date_delta_to_binary_interval_op(), 659 exp.DateFromParts: rename_func("MAKE_DATE"), 660 exp.DateSub: date_delta_to_binary_interval_op(), 661 exp.DateDiff: _date_diff_sql, 662 exp.DateStrToDate: datestrtodate_sql, 663 exp.Datetime: no_datetime_sql, 664 exp.DatetimeSub: date_delta_to_binary_interval_op(), 665 exp.DatetimeAdd: date_delta_to_binary_interval_op(), 666 exp.DateToDi: lambda self, 667 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 668 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 669 exp.DiToDate: lambda self, 670 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 671 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 672 exp.GenerateDateArray: _generate_datetime_array_sql, 673 exp.GenerateTimestampArray: _generate_datetime_array_sql, 674 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 675 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 676 exp.Explode: rename_func("UNNEST"), 677 exp.IntDiv: lambda self, e: self.binary(e, "//"), 678 exp.IsInf: rename_func("ISINF"), 679 exp.IsNan: rename_func("ISNAN"), 680 exp.JSONBExists: rename_func("JSON_EXISTS"), 681 exp.JSONExtract: _arrow_json_extract_sql, 682 exp.JSONExtractArray: _json_extract_value_array_sql, 683 exp.JSONExtractScalar: _arrow_json_extract_sql, 684 exp.JSONFormat: _json_format_sql, 685 exp.JSONValueArray: _json_extract_value_array_sql, 686 exp.Lateral: explode_to_unnest_sql, 687 exp.LogicalOr: rename_func("BOOL_OR"), 688 exp.LogicalAnd: rename_func("BOOL_AND"), 689 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 690 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 691 exp.MonthsBetween: lambda self, e: self.func( 692 "DATEDIFF", 693 "'month'", 694 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 695 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 696 ), 697 exp.PercentileCont: rename_func("QUANTILE_CONT"), 698 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 699 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 700 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 701 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 702 exp.RegexpReplace: lambda self, e: self.func( 703 "REGEXP_REPLACE", 704 e.this, 705 e.expression, 706 e.args.get("replacement"), 707 e.args.get("modifiers"), 708 ), 709 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 710 exp.RegexpILike: lambda self, e: self.func( 711 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 712 ), 713 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 714 exp.Return: lambda self, e: self.sql(e, "this"), 715 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 716 exp.Rand: rename_func("RANDOM"), 717 exp.SHA: rename_func("SHA1"), 718 exp.SHA2: sha256_sql, 719 exp.Split: rename_func("STR_SPLIT"), 720 exp.SortArray: _sort_array_sql, 721 exp.StrPosition: strposition_sql, 722 exp.StrToUnix: lambda self, e: self.func( 723 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 724 ), 725 exp.Struct: _struct_sql, 726 exp.Transform: rename_func("LIST_TRANSFORM"), 727 exp.TimeAdd: date_delta_to_binary_interval_op(), 728 exp.Time: no_time_sql, 729 exp.TimeDiff: _timediff_sql, 730 exp.Timestamp: no_timestamp_sql, 731 exp.TimestampDiff: lambda self, e: self.func( 732 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 733 ), 734 exp.TimestampTrunc: timestamptrunc_sql(), 735 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 736 exp.TimeStrToTime: timestrtotime_sql, 737 exp.TimeStrToUnix: lambda self, e: self.func( 738 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 739 ), 740 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 741 exp.TimeToUnix: rename_func("EPOCH"), 742 exp.TsOrDiToDi: lambda self, 743 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 744 exp.TsOrDsAdd: date_delta_to_binary_interval_op(), 745 exp.TsOrDsDiff: lambda self, e: self.func( 746 "DATE_DIFF", 747 f"'{e.args.get('unit') or 'DAY'}'", 748 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 749 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 750 ), 751 exp.UnixToStr: lambda self, e: self.func( 752 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 753 ), 754 exp.DatetimeTrunc: lambda self, e: self.func( 755 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 756 ), 757 exp.UnixToTime: _unix_to_time_sql, 758 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 759 exp.VariancePop: rename_func("VAR_POP"), 760 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 761 exp.Xor: bool_xor_sql, 762 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 763 rename_func("LEVENSHTEIN") 764 ), 765 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 766 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 767 exp.DateBin: rename_func("TIME_BUCKET"), 768 } 769 770 SUPPORTED_JSON_PATH_PARTS = { 771 exp.JSONPathKey, 772 exp.JSONPathRoot, 773 exp.JSONPathSubscript, 774 exp.JSONPathWildcard, 775 } 776 777 TYPE_MAPPING = { 778 **generator.Generator.TYPE_MAPPING, 779 exp.DataType.Type.BINARY: "BLOB", 780 exp.DataType.Type.BPCHAR: "TEXT", 781 exp.DataType.Type.CHAR: "TEXT", 782 exp.DataType.Type.DATETIME: "TIMESTAMP", 783 exp.DataType.Type.FLOAT: "REAL", 784 exp.DataType.Type.JSONB: "JSON", 785 exp.DataType.Type.NCHAR: "TEXT", 786 exp.DataType.Type.NVARCHAR: "TEXT", 787 exp.DataType.Type.UINT: "UINTEGER", 788 exp.DataType.Type.VARBINARY: "BLOB", 789 exp.DataType.Type.ROWVERSION: "BLOB", 790 exp.DataType.Type.VARCHAR: "TEXT", 791 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 792 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 793 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 794 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 795 } 796 797 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 798 RESERVED_KEYWORDS = { 799 "array", 800 "analyse", 801 "union", 802 "all", 803 "when", 804 "in_p", 805 "default", 806 "create_p", 807 "window", 808 "asymmetric", 809 "to", 810 "else", 811 "localtime", 812 "from", 813 "end_p", 814 "select", 815 "current_date", 816 "foreign", 817 "with", 818 "grant", 819 "session_user", 820 "or", 821 "except", 822 "references", 823 "fetch", 824 "limit", 825 "group_p", 826 "leading", 827 "into", 828 "collate", 829 "offset", 830 "do", 831 "then", 832 "localtimestamp", 833 "check_p", 834 "lateral_p", 835 "current_role", 836 "where", 837 "asc_p", 838 "placing", 839 "desc_p", 840 "user", 841 "unique", 842 "initially", 843 "column", 844 "both", 845 "some", 846 "as", 847 "any", 848 "only", 849 "deferrable", 850 "null_p", 851 "current_time", 852 "true_p", 853 "table", 854 "case", 855 "trailing", 856 "variadic", 857 "for", 858 "on", 859 "distinct", 860 "false_p", 861 "not", 862 "constraint", 863 "current_timestamp", 864 "returning", 865 "primary", 866 "intersect", 867 "having", 868 "analyze", 869 "current_user", 870 "and", 871 "cast", 872 "symmetric", 873 "using", 874 "order", 875 "current_catalog", 876 } 877 878 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 879 880 # DuckDB doesn't generally support CREATE TABLE .. properties 881 # https://duckdb.org/docs/sql/statements/create_table.html 882 PROPERTIES_LOCATION = { 883 prop: exp.Properties.Location.UNSUPPORTED 884 for prop in generator.Generator.PROPERTIES_LOCATION 885 } 886 887 # There are a few exceptions (e.g. temporary tables) which are supported or 888 # can be transpiled to DuckDB, so we explicitly override them accordingly 889 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 890 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 891 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 892 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 893 894 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 895 exp.FirstValue, 896 exp.Lag, 897 exp.LastValue, 898 exp.Lead, 899 exp.NthValue, 900 ) 901 902 def lambda_sql( 903 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 904 ) -> str: 905 if expression.args.get("colon"): 906 prefix = "LAMBDA " 907 arrow_sep = ":" 908 wrap = False 909 else: 910 prefix = "" 911 912 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 913 return f"{prefix}{lambda_sql}" 914 915 def show_sql(self, expression: exp.Show) -> str: 916 return f"SHOW {expression.name}" 917 918 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 919 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 920 921 def strtotime_sql(self, expression: exp.StrToTime) -> str: 922 if expression.args.get("safe"): 923 formatted_time = self.format_time(expression) 924 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 925 return str_to_time_sql(self, expression) 926 927 def strtodate_sql(self, expression: exp.StrToDate) -> str: 928 if expression.args.get("safe"): 929 formatted_time = self.format_time(expression) 930 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 931 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 932 933 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 934 arg = expression.this 935 if expression.args.get("safe"): 936 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 937 return self.func("JSON", arg) 938 939 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 940 nano = expression.args.get("nano") 941 if nano is not None: 942 expression.set( 943 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 944 ) 945 946 return rename_func("MAKE_TIME")(self, expression) 947 948 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 949 sec = expression.args["sec"] 950 951 milli = expression.args.get("milli") 952 if milli is not None: 953 sec += milli.pop() / exp.Literal.number(1000.0) 954 955 nano = expression.args.get("nano") 956 if nano is not None: 957 sec += nano.pop() / exp.Literal.number(1000000000.0) 958 959 if milli or nano: 960 expression.set("sec", sec) 961 962 return rename_func("MAKE_TIMESTAMP")(self, expression) 963 964 def tablesample_sql( 965 self, 966 expression: exp.TableSample, 967 tablesample_keyword: t.Optional[str] = None, 968 ) -> str: 969 if not isinstance(expression.parent, exp.Select): 970 # This sample clause only applies to a single source, not the entire resulting relation 971 tablesample_keyword = "TABLESAMPLE" 972 973 if expression.args.get("size"): 974 method = expression.args.get("method") 975 if method and method.name.upper() != "RESERVOIR": 976 self.unsupported( 977 f"Sampling method {method} is not supported with a discrete sample count, " 978 "defaulting to reservoir sampling" 979 ) 980 expression.set("method", exp.var("RESERVOIR")) 981 982 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 983 984 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 985 if isinstance(expression.parent, exp.UserDefinedFunction): 986 return self.sql(expression, "this") 987 return super().columndef_sql(expression, sep) 988 989 def join_sql(self, expression: exp.Join) -> str: 990 if ( 991 expression.side == "LEFT" 992 and not expression.args.get("on") 993 and isinstance(expression.this, exp.Unnest) 994 ): 995 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 996 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 997 return super().join_sql(expression.on(exp.true())) 998 999 return super().join_sql(expression) 1000 1001 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1002 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1003 if expression.args.get("is_end_exclusive"): 1004 return rename_func("RANGE")(self, expression) 1005 1006 return self.function_fallback_sql(expression) 1007 1008 def countif_sql(self, expression: exp.CountIf) -> str: 1009 if self.dialect.version >= Version("1.2"): 1010 return self.function_fallback_sql(expression) 1011 1012 # https://github.com/tobymao/sqlglot/pull/4749 1013 return count_if_to_sum(self, expression) 1014 1015 def bracket_sql(self, expression: exp.Bracket) -> str: 1016 if self.dialect.version >= Version("1.2"): 1017 return super().bracket_sql(expression) 1018 1019 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1020 this = expression.this 1021 if isinstance(this, exp.Array): 1022 this.replace(exp.paren(this)) 1023 1024 bracket = super().bracket_sql(expression) 1025 1026 if not expression.args.get("returns_list_for_maps"): 1027 if not this.type: 1028 from sqlglot.optimizer.annotate_types import annotate_types 1029 1030 this = annotate_types(this, dialect=self.dialect) 1031 1032 if this.is_type(exp.DataType.Type.MAP): 1033 bracket = f"({bracket})[1]" 1034 1035 return bracket 1036 1037 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1038 expression_sql = self.sql(expression, "expression") 1039 1040 func = expression.this 1041 if isinstance(func, exp.PERCENTILES): 1042 # Make the order key the first arg and slide the fraction to the right 1043 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1044 order_col = expression.find(exp.Ordered) 1045 if order_col: 1046 func.set("expression", func.this) 1047 func.set("this", order_col.this) 1048 1049 this = self.sql(expression, "this").rstrip(")") 1050 1051 return f"{this}{expression_sql})" 1052 1053 def length_sql(self, expression: exp.Length) -> str: 1054 arg = expression.this 1055 1056 # Dialects like BQ and Snowflake also accept binary values as args, so 1057 # DDB will attempt to infer the type or resort to case/when resolution 1058 if not expression.args.get("binary") or arg.is_string: 1059 return self.func("LENGTH", arg) 1060 1061 if not arg.type: 1062 from sqlglot.optimizer.annotate_types import annotate_types 1063 1064 arg = annotate_types(arg, dialect=self.dialect) 1065 1066 if arg.is_type(*exp.DataType.TEXT_TYPES): 1067 return self.func("LENGTH", arg) 1068 1069 # We need these casts to make duckdb's static type checker happy 1070 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1071 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1072 1073 case = ( 1074 exp.case(self.func("TYPEOF", arg)) 1075 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1076 .else_( 1077 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1078 ) # anonymous to break length_sql recursion 1079 ) 1080 1081 return self.sql(case) 1082 1083 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1084 this = expression.this 1085 key = expression.args.get("key") 1086 key_sql = key.name if isinstance(key, exp.Expression) else "" 1087 value_sql = self.sql(expression, "value") 1088 1089 kv_sql = f"{key_sql} := {value_sql}" 1090 1091 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1092 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1093 if isinstance(this, exp.Struct) and not this.expressions: 1094 return self.func("STRUCT_PACK", kv_sql) 1095 1096 return self.func("STRUCT_INSERT", this, kv_sql) 1097 1098 def unnest_sql(self, expression: exp.Unnest) -> str: 1099 explode_array = expression.args.get("explode_array") 1100 if explode_array: 1101 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1102 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1103 expression.expressions.append( 1104 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1105 ) 1106 1107 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1108 alias = expression.args.get("alias") 1109 if isinstance(alias, exp.TableAlias): 1110 expression.set("alias", None) 1111 if alias.columns: 1112 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1113 1114 unnest_sql = super().unnest_sql(expression) 1115 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1116 return self.sql(select) 1117 1118 return super().unnest_sql(expression) 1119 1120 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1121 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1122 # DuckDB should render IGNORE NULLS only for the general-purpose 1123 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1124 return super().ignorenulls_sql(expression) 1125 1126 if not isinstance(expression.this, exp.AnyValue): 1127 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1128 1129 return self.sql(expression, "this") 1130 1131 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1132 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1133 # DuckDB should render RESPECT NULLS only for the general-purpose 1134 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1135 return super().respectnulls_sql(expression) 1136 1137 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1138 return self.sql(expression, "this") 1139 1140 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1141 this = self.sql(expression, "this") 1142 null_text = self.sql(expression, "null") 1143 1144 if null_text: 1145 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1146 1147 return self.func("ARRAY_TO_STRING", this, expression.expression) 1148 1149 @unsupported_args("position", "occurrence") 1150 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1151 group = expression.args.get("group") 1152 params = expression.args.get("parameters") 1153 1154 # Do not render group if there is no following argument, 1155 # and it's the default value for this dialect 1156 if ( 1157 not params 1158 and group 1159 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1160 ): 1161 group = None 1162 return self.func( 1163 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1164 ) 1165 1166 @unsupported_args("culture") 1167 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1168 fmt = expression.args.get("format") 1169 if fmt and fmt.is_int: 1170 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1171 1172 self.unsupported("Only integer formats are supported by NumberToStr") 1173 return self.function_fallback_sql(expression) 1174 1175 def autoincrementcolumnconstraint_sql(self, _) -> str: 1176 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1177 return "" 1178 1179 def aliases_sql(self, expression: exp.Aliases) -> str: 1180 this = expression.this 1181 if isinstance(this, exp.Posexplode): 1182 return self.posexplode_sql(this) 1183 1184 return super().aliases_sql(expression) 1185 1186 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1187 this = expression.this 1188 parent = expression.parent 1189 1190 # The default Spark aliases are "pos" and "col", unless specified otherwise 1191 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1192 1193 if isinstance(parent, exp.Aliases): 1194 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1195 pos, col = parent.expressions 1196 elif isinstance(parent, exp.Table): 1197 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1198 alias = parent.args.get("alias") 1199 if alias: 1200 pos, col = alias.columns or [pos, col] 1201 alias.pop() 1202 1203 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1204 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1205 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1206 gen_subscripts = self.sql( 1207 exp.Alias( 1208 this=exp.Anonymous( 1209 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1210 ) 1211 - exp.Literal.number(1), 1212 alias=pos, 1213 ) 1214 ) 1215 1216 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1217 1218 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1219 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1220 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1221 1222 return posexplode_sql 1223 1224 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1225 this = expression.this 1226 1227 if not this.type: 1228 from sqlglot.optimizer.annotate_types import annotate_types 1229 1230 this = annotate_types(this, dialect=self.dialect) 1231 1232 if this.is_type(*exp.DataType.TEXT_TYPES): 1233 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1234 1235 func = self.func( 1236 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1237 ) 1238 1239 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1240 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1241 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1242 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1243 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1244 return self.sql(exp.Cast(this=func, to=this.type)) 1245 1246 return self.sql(func)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
902 def lambda_sql( 903 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 904 ) -> str: 905 if expression.args.get("colon"): 906 prefix = "LAMBDA " 907 arrow_sep = ":" 908 wrap = False 909 else: 910 prefix = "" 911 912 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 913 return f"{prefix}{lambda_sql}"
927 def strtodate_sql(self, expression: exp.StrToDate) -> str: 928 if expression.args.get("safe"): 929 formatted_time = self.format_time(expression) 930 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 931 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
939 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 940 nano = expression.args.get("nano") 941 if nano is not None: 942 expression.set( 943 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 944 ) 945 946 return rename_func("MAKE_TIME")(self, expression)
948 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 949 sec = expression.args["sec"] 950 951 milli = expression.args.get("milli") 952 if milli is not None: 953 sec += milli.pop() / exp.Literal.number(1000.0) 954 955 nano = expression.args.get("nano") 956 if nano is not None: 957 sec += nano.pop() / exp.Literal.number(1000000000.0) 958 959 if milli or nano: 960 expression.set("sec", sec) 961 962 return rename_func("MAKE_TIMESTAMP")(self, expression)
964 def tablesample_sql( 965 self, 966 expression: exp.TableSample, 967 tablesample_keyword: t.Optional[str] = None, 968 ) -> str: 969 if not isinstance(expression.parent, exp.Select): 970 # This sample clause only applies to a single source, not the entire resulting relation 971 tablesample_keyword = "TABLESAMPLE" 972 973 if expression.args.get("size"): 974 method = expression.args.get("method") 975 if method and method.name.upper() != "RESERVOIR": 976 self.unsupported( 977 f"Sampling method {method} is not supported with a discrete sample count, " 978 "defaulting to reservoir sampling" 979 ) 980 expression.set("method", exp.var("RESERVOIR")) 981 982 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
989 def join_sql(self, expression: exp.Join) -> str: 990 if ( 991 expression.side == "LEFT" 992 and not expression.args.get("on") 993 and isinstance(expression.this, exp.Unnest) 994 ): 995 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 996 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 997 return super().join_sql(expression.on(exp.true())) 998 999 return super().join_sql(expression)
1015 def bracket_sql(self, expression: exp.Bracket) -> str: 1016 if self.dialect.version >= Version("1.2"): 1017 return super().bracket_sql(expression) 1018 1019 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1020 this = expression.this 1021 if isinstance(this, exp.Array): 1022 this.replace(exp.paren(this)) 1023 1024 bracket = super().bracket_sql(expression) 1025 1026 if not expression.args.get("returns_list_for_maps"): 1027 if not this.type: 1028 from sqlglot.optimizer.annotate_types import annotate_types 1029 1030 this = annotate_types(this, dialect=self.dialect) 1031 1032 if this.is_type(exp.DataType.Type.MAP): 1033 bracket = f"({bracket})[1]" 1034 1035 return bracket
1037 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1038 expression_sql = self.sql(expression, "expression") 1039 1040 func = expression.this 1041 if isinstance(func, exp.PERCENTILES): 1042 # Make the order key the first arg and slide the fraction to the right 1043 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1044 order_col = expression.find(exp.Ordered) 1045 if order_col: 1046 func.set("expression", func.this) 1047 func.set("this", order_col.this) 1048 1049 this = self.sql(expression, "this").rstrip(")") 1050 1051 return f"{this}{expression_sql})"
1053 def length_sql(self, expression: exp.Length) -> str: 1054 arg = expression.this 1055 1056 # Dialects like BQ and Snowflake also accept binary values as args, so 1057 # DDB will attempt to infer the type or resort to case/when resolution 1058 if not expression.args.get("binary") or arg.is_string: 1059 return self.func("LENGTH", arg) 1060 1061 if not arg.type: 1062 from sqlglot.optimizer.annotate_types import annotate_types 1063 1064 arg = annotate_types(arg, dialect=self.dialect) 1065 1066 if arg.is_type(*exp.DataType.TEXT_TYPES): 1067 return self.func("LENGTH", arg) 1068 1069 # We need these casts to make duckdb's static type checker happy 1070 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1071 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1072 1073 case = ( 1074 exp.case(self.func("TYPEOF", arg)) 1075 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1076 .else_( 1077 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1078 ) # anonymous to break length_sql recursion 1079 ) 1080 1081 return self.sql(case)
1083 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1084 this = expression.this 1085 key = expression.args.get("key") 1086 key_sql = key.name if isinstance(key, exp.Expression) else "" 1087 value_sql = self.sql(expression, "value") 1088 1089 kv_sql = f"{key_sql} := {value_sql}" 1090 1091 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1092 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1093 if isinstance(this, exp.Struct) and not this.expressions: 1094 return self.func("STRUCT_PACK", kv_sql) 1095 1096 return self.func("STRUCT_INSERT", this, kv_sql)
1098 def unnest_sql(self, expression: exp.Unnest) -> str: 1099 explode_array = expression.args.get("explode_array") 1100 if explode_array: 1101 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1102 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1103 expression.expressions.append( 1104 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1105 ) 1106 1107 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1108 alias = expression.args.get("alias") 1109 if isinstance(alias, exp.TableAlias): 1110 expression.set("alias", None) 1111 if alias.columns: 1112 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1113 1114 unnest_sql = super().unnest_sql(expression) 1115 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1116 return self.sql(select) 1117 1118 return super().unnest_sql(expression)
1120 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1121 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1122 # DuckDB should render IGNORE NULLS only for the general-purpose 1123 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1124 return super().ignorenulls_sql(expression) 1125 1126 if not isinstance(expression.this, exp.AnyValue): 1127 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1128 1129 return self.sql(expression, "this")
1131 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1132 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1133 # DuckDB should render RESPECT NULLS only for the general-purpose 1134 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1135 return super().respectnulls_sql(expression) 1136 1137 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1138 return self.sql(expression, "this")
1140 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1141 this = self.sql(expression, "this") 1142 null_text = self.sql(expression, "null") 1143 1144 if null_text: 1145 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1146 1147 return self.func("ARRAY_TO_STRING", this, expression.expression)
1149 @unsupported_args("position", "occurrence") 1150 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1151 group = expression.args.get("group") 1152 params = expression.args.get("parameters") 1153 1154 # Do not render group if there is no following argument, 1155 # and it's the default value for this dialect 1156 if ( 1157 not params 1158 and group 1159 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1160 ): 1161 group = None 1162 return self.func( 1163 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1164 )
1166 @unsupported_args("culture") 1167 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1168 fmt = expression.args.get("format") 1169 if fmt and fmt.is_int: 1170 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1171 1172 self.unsupported("Only integer formats are supported by NumberToStr") 1173 return self.function_fallback_sql(expression)
1186 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1187 this = expression.this 1188 parent = expression.parent 1189 1190 # The default Spark aliases are "pos" and "col", unless specified otherwise 1191 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1192 1193 if isinstance(parent, exp.Aliases): 1194 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1195 pos, col = parent.expressions 1196 elif isinstance(parent, exp.Table): 1197 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1198 alias = parent.args.get("alias") 1199 if alias: 1200 pos, col = alias.columns or [pos, col] 1201 alias.pop() 1202 1203 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1204 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1205 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1206 gen_subscripts = self.sql( 1207 exp.Alias( 1208 this=exp.Anonymous( 1209 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1210 ) 1211 - exp.Literal.number(1), 1212 alias=pos, 1213 ) 1214 ) 1215 1216 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1217 1218 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1219 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1220 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1221 1222 return posexplode_sql
1224 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1225 this = expression.this 1226 1227 if not this.type: 1228 from sqlglot.optimizer.annotate_types import annotate_types 1229 1230 this = annotate_types(this, dialect=self.dialect) 1231 1232 if this.is_type(*exp.DataType.TEXT_TYPES): 1233 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1234 1235 func = self.func( 1236 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1237 ) 1238 1239 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1240 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1241 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1242 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1243 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1244 return self.sql(exp.Cast(this=func, to=this.type)) 1245 1246 return self.sql(func)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_BETWEEN_FLAGS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql