sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.BIGDECIMAL, 182 TokenType.UUID, 183 TokenType.GEOGRAPHY, 184 TokenType.GEOMETRY, 185 TokenType.HLLSKETCH, 186 TokenType.HSTORE, 187 TokenType.PSEUDO_TYPE, 188 TokenType.SUPER, 189 TokenType.SERIAL, 190 TokenType.SMALLSERIAL, 191 TokenType.BIGSERIAL, 192 TokenType.XML, 193 TokenType.YEAR, 194 TokenType.UNIQUEIDENTIFIER, 195 TokenType.USERDEFINED, 196 TokenType.MONEY, 197 TokenType.SMALLMONEY, 198 TokenType.ROWVERSION, 199 TokenType.IMAGE, 200 TokenType.VARIANT, 201 TokenType.OBJECT, 202 TokenType.OBJECT_IDENTIFIER, 203 TokenType.INET, 204 TokenType.IPADDRESS, 205 TokenType.IPPREFIX, 206 TokenType.UNKNOWN, 207 TokenType.NULL, 208 *ENUM_TYPE_TOKENS, 209 *NESTED_TYPE_TOKENS, 210 } 211 212 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 213 TokenType.BIGINT: TokenType.UBIGINT, 214 TokenType.INT: TokenType.UINT, 215 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 216 TokenType.SMALLINT: TokenType.USMALLINT, 217 TokenType.TINYINT: TokenType.UTINYINT, 218 } 219 220 SUBQUERY_PREDICATES = { 221 TokenType.ANY: exp.Any, 222 TokenType.ALL: exp.All, 223 TokenType.EXISTS: exp.Exists, 224 TokenType.SOME: exp.Any, 225 } 226 227 RESERVED_KEYWORDS = { 228 *Tokenizer.SINGLE_TOKENS.values(), 229 TokenType.SELECT, 230 } 231 232 DB_CREATABLES = { 233 TokenType.DATABASE, 234 TokenType.SCHEMA, 235 TokenType.TABLE, 236 TokenType.VIEW, 237 TokenType.DICTIONARY, 238 } 239 240 CREATABLES = { 241 TokenType.COLUMN, 242 TokenType.FUNCTION, 243 TokenType.INDEX, 244 TokenType.PROCEDURE, 245 *DB_CREATABLES, 246 } 247 248 # Tokens that can represent identifiers 249 ID_VAR_TOKENS = { 250 TokenType.VAR, 251 TokenType.ANTI, 252 TokenType.APPLY, 253 TokenType.ASC, 254 TokenType.AUTO_INCREMENT, 255 TokenType.BEGIN, 256 TokenType.CACHE, 257 TokenType.CASE, 258 TokenType.COLLATE, 259 TokenType.COMMAND, 260 TokenType.COMMENT, 261 TokenType.COMMIT, 262 TokenType.CONSTRAINT, 263 TokenType.DEFAULT, 264 TokenType.DELETE, 265 TokenType.DESC, 266 TokenType.DESCRIBE, 267 TokenType.DICTIONARY, 268 TokenType.DIV, 269 TokenType.END, 270 TokenType.EXECUTE, 271 TokenType.ESCAPE, 272 TokenType.FALSE, 273 TokenType.FIRST, 274 TokenType.FILTER, 275 TokenType.FORMAT, 276 TokenType.FULL, 277 TokenType.IS, 278 TokenType.ISNULL, 279 TokenType.INTERVAL, 280 TokenType.KEEP, 281 TokenType.KILL, 282 TokenType.LEFT, 283 TokenType.LOAD, 284 TokenType.MERGE, 285 TokenType.NATURAL, 286 TokenType.NEXT, 287 TokenType.OFFSET, 288 TokenType.ORDINALITY, 289 TokenType.OVERLAPS, 290 TokenType.OVERWRITE, 291 TokenType.PARTITION, 292 TokenType.PERCENT, 293 TokenType.PIVOT, 294 TokenType.PRAGMA, 295 TokenType.RANGE, 296 TokenType.REFERENCES, 297 TokenType.RIGHT, 298 TokenType.ROW, 299 TokenType.ROWS, 300 TokenType.SEMI, 301 TokenType.SET, 302 TokenType.SETTINGS, 303 TokenType.SHOW, 304 TokenType.TEMPORARY, 305 TokenType.TOP, 306 TokenType.TRUE, 307 TokenType.UNIQUE, 308 TokenType.UNPIVOT, 309 TokenType.UPDATE, 310 TokenType.VOLATILE, 311 TokenType.WINDOW, 312 *CREATABLES, 313 *SUBQUERY_PREDICATES, 314 *TYPE_TOKENS, 315 *NO_PAREN_FUNCTIONS, 316 } 317 318 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 319 320 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 321 TokenType.ANTI, 322 TokenType.APPLY, 323 TokenType.ASOF, 324 TokenType.FULL, 325 TokenType.LEFT, 326 TokenType.LOCK, 327 TokenType.NATURAL, 328 TokenType.OFFSET, 329 TokenType.RIGHT, 330 TokenType.SEMI, 331 TokenType.WINDOW, 332 } 333 334 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 335 336 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 337 338 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 339 340 FUNC_TOKENS = { 341 TokenType.COMMAND, 342 TokenType.CURRENT_DATE, 343 TokenType.CURRENT_DATETIME, 344 TokenType.CURRENT_TIMESTAMP, 345 TokenType.CURRENT_TIME, 346 TokenType.CURRENT_USER, 347 TokenType.FILTER, 348 TokenType.FIRST, 349 TokenType.FORMAT, 350 TokenType.GLOB, 351 TokenType.IDENTIFIER, 352 TokenType.INDEX, 353 TokenType.ISNULL, 354 TokenType.ILIKE, 355 TokenType.INSERT, 356 TokenType.LIKE, 357 TokenType.MERGE, 358 TokenType.OFFSET, 359 TokenType.PRIMARY_KEY, 360 TokenType.RANGE, 361 TokenType.REPLACE, 362 TokenType.RLIKE, 363 TokenType.ROW, 364 TokenType.UNNEST, 365 TokenType.VAR, 366 TokenType.LEFT, 367 TokenType.RIGHT, 368 TokenType.DATE, 369 TokenType.DATETIME, 370 TokenType.TABLE, 371 TokenType.TIMESTAMP, 372 TokenType.TIMESTAMPTZ, 373 TokenType.WINDOW, 374 TokenType.XOR, 375 *TYPE_TOKENS, 376 *SUBQUERY_PREDICATES, 377 } 378 379 CONJUNCTION = { 380 TokenType.AND: exp.And, 381 TokenType.OR: exp.Or, 382 } 383 384 EQUALITY = { 385 TokenType.EQ: exp.EQ, 386 TokenType.NEQ: exp.NEQ, 387 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 388 } 389 390 COMPARISON = { 391 TokenType.GT: exp.GT, 392 TokenType.GTE: exp.GTE, 393 TokenType.LT: exp.LT, 394 TokenType.LTE: exp.LTE, 395 } 396 397 BITWISE = { 398 TokenType.AMP: exp.BitwiseAnd, 399 TokenType.CARET: exp.BitwiseXor, 400 TokenType.PIPE: exp.BitwiseOr, 401 TokenType.DPIPE: exp.DPipe, 402 } 403 404 TERM = { 405 TokenType.DASH: exp.Sub, 406 TokenType.PLUS: exp.Add, 407 TokenType.MOD: exp.Mod, 408 TokenType.COLLATE: exp.Collate, 409 } 410 411 FACTOR = { 412 TokenType.DIV: exp.IntDiv, 413 TokenType.LR_ARROW: exp.Distance, 414 TokenType.SLASH: exp.Div, 415 TokenType.STAR: exp.Mul, 416 } 417 418 TIMES = { 419 TokenType.TIME, 420 TokenType.TIMETZ, 421 } 422 423 TIMESTAMPS = { 424 TokenType.TIMESTAMP, 425 TokenType.TIMESTAMPTZ, 426 TokenType.TIMESTAMPLTZ, 427 *TIMES, 428 } 429 430 SET_OPERATIONS = { 431 TokenType.UNION, 432 TokenType.INTERSECT, 433 TokenType.EXCEPT, 434 } 435 436 JOIN_METHODS = { 437 TokenType.NATURAL, 438 TokenType.ASOF, 439 } 440 441 JOIN_SIDES = { 442 TokenType.LEFT, 443 TokenType.RIGHT, 444 TokenType.FULL, 445 } 446 447 JOIN_KINDS = { 448 TokenType.INNER, 449 TokenType.OUTER, 450 TokenType.CROSS, 451 TokenType.SEMI, 452 TokenType.ANTI, 453 } 454 455 JOIN_HINTS: t.Set[str] = set() 456 457 LAMBDAS = { 458 TokenType.ARROW: lambda self, expressions: self.expression( 459 exp.Lambda, 460 this=self._replace_lambda( 461 self._parse_conjunction(), 462 {node.name for node in expressions}, 463 ), 464 expressions=expressions, 465 ), 466 TokenType.FARROW: lambda self, expressions: self.expression( 467 exp.Kwarg, 468 this=exp.var(expressions[0].name), 469 expression=self._parse_conjunction(), 470 ), 471 } 472 473 COLUMN_OPERATORS = { 474 TokenType.DOT: None, 475 TokenType.DCOLON: lambda self, this, to: self.expression( 476 exp.Cast if self.STRICT_CAST else exp.TryCast, 477 this=this, 478 to=to, 479 ), 480 TokenType.ARROW: lambda self, this, path: self.expression( 481 exp.JSONExtract, 482 this=this, 483 expression=path, 484 ), 485 TokenType.DARROW: lambda self, this, path: self.expression( 486 exp.JSONExtractScalar, 487 this=this, 488 expression=path, 489 ), 490 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 491 exp.JSONBExtract, 492 this=this, 493 expression=path, 494 ), 495 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 496 exp.JSONBExtractScalar, 497 this=this, 498 expression=path, 499 ), 500 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 501 exp.JSONBContains, 502 this=this, 503 expression=key, 504 ), 505 } 506 507 EXPRESSION_PARSERS = { 508 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 509 exp.Column: lambda self: self._parse_column(), 510 exp.Condition: lambda self: self._parse_conjunction(), 511 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 512 exp.Expression: lambda self: self._parse_statement(), 513 exp.From: lambda self: self._parse_from(), 514 exp.Group: lambda self: self._parse_group(), 515 exp.Having: lambda self: self._parse_having(), 516 exp.Identifier: lambda self: self._parse_id_var(), 517 exp.Join: lambda self: self._parse_join(), 518 exp.Lambda: lambda self: self._parse_lambda(), 519 exp.Lateral: lambda self: self._parse_lateral(), 520 exp.Limit: lambda self: self._parse_limit(), 521 exp.Offset: lambda self: self._parse_offset(), 522 exp.Order: lambda self: self._parse_order(), 523 exp.Ordered: lambda self: self._parse_ordered(), 524 exp.Properties: lambda self: self._parse_properties(), 525 exp.Qualify: lambda self: self._parse_qualify(), 526 exp.Returning: lambda self: self._parse_returning(), 527 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 528 exp.Table: lambda self: self._parse_table_parts(), 529 exp.TableAlias: lambda self: self._parse_table_alias(), 530 exp.Where: lambda self: self._parse_where(), 531 exp.Window: lambda self: self._parse_named_window(), 532 exp.With: lambda self: self._parse_with(), 533 "JOIN_TYPE": lambda self: self._parse_join_parts(), 534 } 535 536 STATEMENT_PARSERS = { 537 TokenType.ALTER: lambda self: self._parse_alter(), 538 TokenType.BEGIN: lambda self: self._parse_transaction(), 539 TokenType.CACHE: lambda self: self._parse_cache(), 540 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 541 TokenType.COMMENT: lambda self: self._parse_comment(), 542 TokenType.CREATE: lambda self: self._parse_create(), 543 TokenType.DELETE: lambda self: self._parse_delete(), 544 TokenType.DESC: lambda self: self._parse_describe(), 545 TokenType.DESCRIBE: lambda self: self._parse_describe(), 546 TokenType.DROP: lambda self: self._parse_drop(), 547 TokenType.INSERT: lambda self: self._parse_insert(), 548 TokenType.KILL: lambda self: self._parse_kill(), 549 TokenType.LOAD: lambda self: self._parse_load(), 550 TokenType.MERGE: lambda self: self._parse_merge(), 551 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 552 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 553 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 554 TokenType.SET: lambda self: self._parse_set(), 555 TokenType.UNCACHE: lambda self: self._parse_uncache(), 556 TokenType.UPDATE: lambda self: self._parse_update(), 557 TokenType.USE: lambda self: self.expression( 558 exp.Use, 559 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 560 and exp.var(self._prev.text), 561 this=self._parse_table(schema=False), 562 ), 563 } 564 565 UNARY_PARSERS = { 566 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 567 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 568 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 569 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 570 } 571 572 PRIMARY_PARSERS = { 573 TokenType.STRING: lambda self, token: self.expression( 574 exp.Literal, this=token.text, is_string=True 575 ), 576 TokenType.NUMBER: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=False 578 ), 579 TokenType.STAR: lambda self, _: self.expression( 580 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 581 ), 582 TokenType.NULL: lambda self, _: self.expression(exp.Null), 583 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 584 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 585 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 586 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 587 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 588 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 589 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 590 exp.National, this=token.text 591 ), 592 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 593 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 594 exp.RawString, this=token.text 595 ), 596 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 597 } 598 599 PLACEHOLDER_PARSERS = { 600 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 601 TokenType.PARAMETER: lambda self: self._parse_parameter(), 602 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 603 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 604 else None, 605 } 606 607 RANGE_PARSERS = { 608 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 609 TokenType.GLOB: binary_range_parser(exp.Glob), 610 TokenType.ILIKE: binary_range_parser(exp.ILike), 611 TokenType.IN: lambda self, this: self._parse_in(this), 612 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 613 TokenType.IS: lambda self, this: self._parse_is(this), 614 TokenType.LIKE: binary_range_parser(exp.Like), 615 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 616 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 617 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 618 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 619 } 620 621 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 622 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 623 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 624 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 625 "CHARACTER SET": lambda self: self._parse_character_set(), 626 "CHECKSUM": lambda self: self._parse_checksum(), 627 "CLUSTER BY": lambda self: self._parse_cluster(), 628 "CLUSTERED": lambda self: self._parse_clustered_by(), 629 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 630 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 631 "COPY": lambda self: self._parse_copy_property(), 632 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 633 "DEFINER": lambda self: self._parse_definer(), 634 "DETERMINISTIC": lambda self: self.expression( 635 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 636 ), 637 "DISTKEY": lambda self: self._parse_distkey(), 638 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 639 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 640 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 641 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 642 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 643 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 644 "FREESPACE": lambda self: self._parse_freespace(), 645 "HEAP": lambda self: self.expression(exp.HeapProperty), 646 "IMMUTABLE": lambda self: self.expression( 647 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 648 ), 649 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 650 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 651 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 652 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 653 "LIKE": lambda self: self._parse_create_like(), 654 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 655 "LOCK": lambda self: self._parse_locking(), 656 "LOCKING": lambda self: self._parse_locking(), 657 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 658 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 659 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 660 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 661 "NO": lambda self: self._parse_no_property(), 662 "ON": lambda self: self._parse_on_property(), 663 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 664 "PARTITION BY": lambda self: self._parse_partitioned_by(), 665 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 666 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 667 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 668 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 669 "RETURNS": lambda self: self._parse_returns(), 670 "ROW": lambda self: self._parse_row(), 671 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 672 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 673 "SETTINGS": lambda self: self.expression( 674 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 675 ), 676 "SORTKEY": lambda self: self._parse_sortkey(), 677 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 678 "STABLE": lambda self: self.expression( 679 exp.StabilityProperty, this=exp.Literal.string("STABLE") 680 ), 681 "STORED": lambda self: self._parse_stored(), 682 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 683 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 684 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 685 "TO": lambda self: self._parse_to_table(), 686 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 687 "TTL": lambda self: self._parse_ttl(), 688 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 689 "VOLATILE": lambda self: self._parse_volatile_property(), 690 "WITH": lambda self: self._parse_with_property(), 691 } 692 693 CONSTRAINT_PARSERS = { 694 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 695 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 696 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 697 "CHARACTER SET": lambda self: self.expression( 698 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 699 ), 700 "CHECK": lambda self: self.expression( 701 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 702 ), 703 "COLLATE": lambda self: self.expression( 704 exp.CollateColumnConstraint, this=self._parse_var() 705 ), 706 "COMMENT": lambda self: self.expression( 707 exp.CommentColumnConstraint, this=self._parse_string() 708 ), 709 "COMPRESS": lambda self: self._parse_compress(), 710 "CLUSTERED": lambda self: self.expression( 711 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 712 ), 713 "NONCLUSTERED": lambda self: self.expression( 714 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 715 ), 716 "DEFAULT": lambda self: self.expression( 717 exp.DefaultColumnConstraint, this=self._parse_bitwise() 718 ), 719 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 720 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 721 "FORMAT": lambda self: self.expression( 722 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 723 ), 724 "GENERATED": lambda self: self._parse_generated_as_identity(), 725 "IDENTITY": lambda self: self._parse_auto_increment(), 726 "INLINE": lambda self: self._parse_inline(), 727 "LIKE": lambda self: self._parse_create_like(), 728 "NOT": lambda self: self._parse_not_constraint(), 729 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 730 "ON": lambda self: ( 731 self._match(TokenType.UPDATE) 732 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 733 ) 734 or self.expression(exp.OnProperty, this=self._parse_id_var()), 735 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 736 "PRIMARY KEY": lambda self: self._parse_primary_key(), 737 "REFERENCES": lambda self: self._parse_references(match=False), 738 "TITLE": lambda self: self.expression( 739 exp.TitleColumnConstraint, this=self._parse_var_or_string() 740 ), 741 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 742 "UNIQUE": lambda self: self._parse_unique(), 743 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 744 "WITH": lambda self: self.expression( 745 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 746 ), 747 } 748 749 ALTER_PARSERS = { 750 "ADD": lambda self: self._parse_alter_table_add(), 751 "ALTER": lambda self: self._parse_alter_table_alter(), 752 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 753 "DROP": lambda self: self._parse_alter_table_drop(), 754 "RENAME": lambda self: self._parse_alter_table_rename(), 755 } 756 757 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 758 759 NO_PAREN_FUNCTION_PARSERS = { 760 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 761 "CASE": lambda self: self._parse_case(), 762 "IF": lambda self: self._parse_if(), 763 "NEXT": lambda self: self._parse_next_value_for(), 764 } 765 766 INVALID_FUNC_NAME_TOKENS = { 767 TokenType.IDENTIFIER, 768 TokenType.STRING, 769 } 770 771 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 772 773 FUNCTION_PARSERS = { 774 "ANY_VALUE": lambda self: self._parse_any_value(), 775 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 776 "CONCAT": lambda self: self._parse_concat(), 777 "CONCAT_WS": lambda self: self._parse_concat_ws(), 778 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 779 "DECODE": lambda self: self._parse_decode(), 780 "EXTRACT": lambda self: self._parse_extract(), 781 "JSON_OBJECT": lambda self: self._parse_json_object(), 782 "LOG": lambda self: self._parse_logarithm(), 783 "MATCH": lambda self: self._parse_match_against(), 784 "OPENJSON": lambda self: self._parse_open_json(), 785 "POSITION": lambda self: self._parse_position(), 786 "SAFE_CAST": lambda self: self._parse_cast(False), 787 "STRING_AGG": lambda self: self._parse_string_agg(), 788 "SUBSTRING": lambda self: self._parse_substring(), 789 "TRIM": lambda self: self._parse_trim(), 790 "TRY_CAST": lambda self: self._parse_cast(False), 791 "TRY_CONVERT": lambda self: self._parse_convert(False), 792 } 793 794 QUERY_MODIFIER_PARSERS = { 795 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 796 TokenType.WHERE: lambda self: ("where", self._parse_where()), 797 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 798 TokenType.HAVING: lambda self: ("having", self._parse_having()), 799 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 800 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 801 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 802 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 803 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 804 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 805 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 806 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 807 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 808 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 809 TokenType.CLUSTER_BY: lambda self: ( 810 "cluster", 811 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 812 ), 813 TokenType.DISTRIBUTE_BY: lambda self: ( 814 "distribute", 815 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 816 ), 817 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 818 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 819 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 820 } 821 822 SET_PARSERS = { 823 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 824 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 825 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 826 "TRANSACTION": lambda self: self._parse_set_transaction(), 827 } 828 829 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 830 831 TYPE_LITERAL_PARSERS = { 832 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 833 } 834 835 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 836 837 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 838 839 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 840 841 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 842 TRANSACTION_CHARACTERISTICS = { 843 "ISOLATION LEVEL REPEATABLE READ", 844 "ISOLATION LEVEL READ COMMITTED", 845 "ISOLATION LEVEL READ UNCOMMITTED", 846 "ISOLATION LEVEL SERIALIZABLE", 847 "READ WRITE", 848 "READ ONLY", 849 } 850 851 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 852 853 CLONE_KEYWORDS = {"CLONE", "COPY"} 854 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 855 856 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 857 858 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 859 860 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 861 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 862 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 863 864 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 865 866 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 867 868 DISTINCT_TOKENS = {TokenType.DISTINCT} 869 870 NULL_TOKENS = {TokenType.NULL} 871 872 STRICT_CAST = True 873 874 # A NULL arg in CONCAT yields NULL by default 875 CONCAT_NULL_OUTPUTS_STRING = False 876 877 PREFIXED_PIVOT_COLUMNS = False 878 IDENTIFY_PIVOT_STRINGS = False 879 880 LOG_BASE_FIRST = True 881 LOG_DEFAULTS_TO_LN = False 882 883 # Whether or not ADD is present for each column added by ALTER TABLE 884 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 885 886 # Whether or not the table sample clause expects CSV syntax 887 TABLESAMPLE_CSV = False 888 889 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 890 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 891 892 __slots__ = ( 893 "error_level", 894 "error_message_context", 895 "max_errors", 896 "sql", 897 "errors", 898 "_tokens", 899 "_index", 900 "_curr", 901 "_next", 902 "_prev", 903 "_prev_comments", 904 "_tokenizer", 905 ) 906 907 # Autofilled 908 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 909 INDEX_OFFSET: int = 0 910 UNNEST_COLUMN_ONLY: bool = False 911 ALIAS_POST_TABLESAMPLE: bool = False 912 STRICT_STRING_CONCAT = False 913 SUPPORTS_USER_DEFINED_TYPES = True 914 NORMALIZE_FUNCTIONS = "upper" 915 NULL_ORDERING: str = "nulls_are_small" 916 SHOW_TRIE: t.Dict = {} 917 SET_TRIE: t.Dict = {} 918 FORMAT_MAPPING: t.Dict[str, str] = {} 919 FORMAT_TRIE: t.Dict = {} 920 TIME_MAPPING: t.Dict[str, str] = {} 921 TIME_TRIE: t.Dict = {} 922 923 def __init__( 924 self, 925 error_level: t.Optional[ErrorLevel] = None, 926 error_message_context: int = 100, 927 max_errors: int = 3, 928 ): 929 self.error_level = error_level or ErrorLevel.IMMEDIATE 930 self.error_message_context = error_message_context 931 self.max_errors = max_errors 932 self._tokenizer = self.TOKENIZER_CLASS() 933 self.reset() 934 935 def reset(self): 936 self.sql = "" 937 self.errors = [] 938 self._tokens = [] 939 self._index = 0 940 self._curr = None 941 self._next = None 942 self._prev = None 943 self._prev_comments = None 944 945 def parse( 946 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 947 ) -> t.List[t.Optional[exp.Expression]]: 948 """ 949 Parses a list of tokens and returns a list of syntax trees, one tree 950 per parsed SQL statement. 951 952 Args: 953 raw_tokens: The list of tokens. 954 sql: The original SQL string, used to produce helpful debug messages. 955 956 Returns: 957 The list of the produced syntax trees. 958 """ 959 return self._parse( 960 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 961 ) 962 963 def parse_into( 964 self, 965 expression_types: exp.IntoType, 966 raw_tokens: t.List[Token], 967 sql: t.Optional[str] = None, 968 ) -> t.List[t.Optional[exp.Expression]]: 969 """ 970 Parses a list of tokens into a given Expression type. If a collection of Expression 971 types is given instead, this method will try to parse the token list into each one 972 of them, stopping at the first for which the parsing succeeds. 973 974 Args: 975 expression_types: The expression type(s) to try and parse the token list into. 976 raw_tokens: The list of tokens. 977 sql: The original SQL string, used to produce helpful debug messages. 978 979 Returns: 980 The target Expression. 981 """ 982 errors = [] 983 for expression_type in ensure_list(expression_types): 984 parser = self.EXPRESSION_PARSERS.get(expression_type) 985 if not parser: 986 raise TypeError(f"No parser registered for {expression_type}") 987 988 try: 989 return self._parse(parser, raw_tokens, sql) 990 except ParseError as e: 991 e.errors[0]["into_expression"] = expression_type 992 errors.append(e) 993 994 raise ParseError( 995 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 996 errors=merge_errors(errors), 997 ) from errors[-1] 998 999 def _parse( 1000 self, 1001 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1002 raw_tokens: t.List[Token], 1003 sql: t.Optional[str] = None, 1004 ) -> t.List[t.Optional[exp.Expression]]: 1005 self.reset() 1006 self.sql = sql or "" 1007 1008 total = len(raw_tokens) 1009 chunks: t.List[t.List[Token]] = [[]] 1010 1011 for i, token in enumerate(raw_tokens): 1012 if token.token_type == TokenType.SEMICOLON: 1013 if i < total - 1: 1014 chunks.append([]) 1015 else: 1016 chunks[-1].append(token) 1017 1018 expressions = [] 1019 1020 for tokens in chunks: 1021 self._index = -1 1022 self._tokens = tokens 1023 self._advance() 1024 1025 expressions.append(parse_method(self)) 1026 1027 if self._index < len(self._tokens): 1028 self.raise_error("Invalid expression / Unexpected token") 1029 1030 self.check_errors() 1031 1032 return expressions 1033 1034 def check_errors(self) -> None: 1035 """Logs or raises any found errors, depending on the chosen error level setting.""" 1036 if self.error_level == ErrorLevel.WARN: 1037 for error in self.errors: 1038 logger.error(str(error)) 1039 elif self.error_level == ErrorLevel.RAISE and self.errors: 1040 raise ParseError( 1041 concat_messages(self.errors, self.max_errors), 1042 errors=merge_errors(self.errors), 1043 ) 1044 1045 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1046 """ 1047 Appends an error in the list of recorded errors or raises it, depending on the chosen 1048 error level setting. 1049 """ 1050 token = token or self._curr or self._prev or Token.string("") 1051 start = token.start 1052 end = token.end + 1 1053 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1054 highlight = self.sql[start:end] 1055 end_context = self.sql[end : end + self.error_message_context] 1056 1057 error = ParseError.new( 1058 f"{message}. Line {token.line}, Col: {token.col}.\n" 1059 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1060 description=message, 1061 line=token.line, 1062 col=token.col, 1063 start_context=start_context, 1064 highlight=highlight, 1065 end_context=end_context, 1066 ) 1067 1068 if self.error_level == ErrorLevel.IMMEDIATE: 1069 raise error 1070 1071 self.errors.append(error) 1072 1073 def expression( 1074 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1075 ) -> E: 1076 """ 1077 Creates a new, validated Expression. 1078 1079 Args: 1080 exp_class: The expression class to instantiate. 1081 comments: An optional list of comments to attach to the expression. 1082 kwargs: The arguments to set for the expression along with their respective values. 1083 1084 Returns: 1085 The target expression. 1086 """ 1087 instance = exp_class(**kwargs) 1088 instance.add_comments(comments) if comments else self._add_comments(instance) 1089 return self.validate_expression(instance) 1090 1091 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1092 if expression and self._prev_comments: 1093 expression.add_comments(self._prev_comments) 1094 self._prev_comments = None 1095 1096 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1097 """ 1098 Validates an Expression, making sure that all its mandatory arguments are set. 1099 1100 Args: 1101 expression: The expression to validate. 1102 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1103 1104 Returns: 1105 The validated expression. 1106 """ 1107 if self.error_level != ErrorLevel.IGNORE: 1108 for error_message in expression.error_messages(args): 1109 self.raise_error(error_message) 1110 1111 return expression 1112 1113 def _find_sql(self, start: Token, end: Token) -> str: 1114 return self.sql[start.start : end.end + 1] 1115 1116 def _advance(self, times: int = 1) -> None: 1117 self._index += times 1118 self._curr = seq_get(self._tokens, self._index) 1119 self._next = seq_get(self._tokens, self._index + 1) 1120 1121 if self._index > 0: 1122 self._prev = self._tokens[self._index - 1] 1123 self._prev_comments = self._prev.comments 1124 else: 1125 self._prev = None 1126 self._prev_comments = None 1127 1128 def _retreat(self, index: int) -> None: 1129 if index != self._index: 1130 self._advance(index - self._index) 1131 1132 def _parse_command(self) -> exp.Command: 1133 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1134 1135 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1136 start = self._prev 1137 exists = self._parse_exists() if allow_exists else None 1138 1139 self._match(TokenType.ON) 1140 1141 kind = self._match_set(self.CREATABLES) and self._prev 1142 if not kind: 1143 return self._parse_as_command(start) 1144 1145 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1146 this = self._parse_user_defined_function(kind=kind.token_type) 1147 elif kind.token_type == TokenType.TABLE: 1148 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1149 elif kind.token_type == TokenType.COLUMN: 1150 this = self._parse_column() 1151 else: 1152 this = self._parse_id_var() 1153 1154 self._match(TokenType.IS) 1155 1156 return self.expression( 1157 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1158 ) 1159 1160 def _parse_to_table( 1161 self, 1162 ) -> exp.ToTableProperty: 1163 table = self._parse_table_parts(schema=True) 1164 return self.expression(exp.ToTableProperty, this=table) 1165 1166 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1167 def _parse_ttl(self) -> exp.Expression: 1168 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1169 this = self._parse_bitwise() 1170 1171 if self._match_text_seq("DELETE"): 1172 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1173 if self._match_text_seq("RECOMPRESS"): 1174 return self.expression( 1175 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1176 ) 1177 if self._match_text_seq("TO", "DISK"): 1178 return self.expression( 1179 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1180 ) 1181 if self._match_text_seq("TO", "VOLUME"): 1182 return self.expression( 1183 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1184 ) 1185 1186 return this 1187 1188 expressions = self._parse_csv(_parse_ttl_action) 1189 where = self._parse_where() 1190 group = self._parse_group() 1191 1192 aggregates = None 1193 if group and self._match(TokenType.SET): 1194 aggregates = self._parse_csv(self._parse_set_item) 1195 1196 return self.expression( 1197 exp.MergeTreeTTL, 1198 expressions=expressions, 1199 where=where, 1200 group=group, 1201 aggregates=aggregates, 1202 ) 1203 1204 def _parse_statement(self) -> t.Optional[exp.Expression]: 1205 if self._curr is None: 1206 return None 1207 1208 if self._match_set(self.STATEMENT_PARSERS): 1209 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1210 1211 if self._match_set(Tokenizer.COMMANDS): 1212 return self._parse_command() 1213 1214 expression = self._parse_expression() 1215 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1216 return self._parse_query_modifiers(expression) 1217 1218 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1219 start = self._prev 1220 temporary = self._match(TokenType.TEMPORARY) 1221 materialized = self._match_text_seq("MATERIALIZED") 1222 1223 kind = self._match_set(self.CREATABLES) and self._prev.text 1224 if not kind: 1225 return self._parse_as_command(start) 1226 1227 return self.expression( 1228 exp.Drop, 1229 comments=start.comments, 1230 exists=exists or self._parse_exists(), 1231 this=self._parse_table(schema=True), 1232 kind=kind, 1233 temporary=temporary, 1234 materialized=materialized, 1235 cascade=self._match_text_seq("CASCADE"), 1236 constraints=self._match_text_seq("CONSTRAINTS"), 1237 purge=self._match_text_seq("PURGE"), 1238 ) 1239 1240 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1241 return ( 1242 self._match_text_seq("IF") 1243 and (not not_ or self._match(TokenType.NOT)) 1244 and self._match(TokenType.EXISTS) 1245 ) 1246 1247 def _parse_create(self) -> exp.Create | exp.Command: 1248 # Note: this can't be None because we've matched a statement parser 1249 start = self._prev 1250 comments = self._prev_comments 1251 1252 replace = start.text.upper() == "REPLACE" or self._match_pair( 1253 TokenType.OR, TokenType.REPLACE 1254 ) 1255 unique = self._match(TokenType.UNIQUE) 1256 1257 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1258 self._advance() 1259 1260 properties = None 1261 create_token = self._match_set(self.CREATABLES) and self._prev 1262 1263 if not create_token: 1264 # exp.Properties.Location.POST_CREATE 1265 properties = self._parse_properties() 1266 create_token = self._match_set(self.CREATABLES) and self._prev 1267 1268 if not properties or not create_token: 1269 return self._parse_as_command(start) 1270 1271 exists = self._parse_exists(not_=True) 1272 this = None 1273 expression: t.Optional[exp.Expression] = None 1274 indexes = None 1275 no_schema_binding = None 1276 begin = None 1277 clone = None 1278 1279 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1280 nonlocal properties 1281 if properties and temp_props: 1282 properties.expressions.extend(temp_props.expressions) 1283 elif temp_props: 1284 properties = temp_props 1285 1286 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1287 this = self._parse_user_defined_function(kind=create_token.token_type) 1288 1289 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1290 extend_props(self._parse_properties()) 1291 1292 self._match(TokenType.ALIAS) 1293 1294 if self._match(TokenType.COMMAND): 1295 expression = self._parse_as_command(self._prev) 1296 else: 1297 begin = self._match(TokenType.BEGIN) 1298 return_ = self._match_text_seq("RETURN") 1299 1300 if self._match(TokenType.STRING, advance=False): 1301 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1302 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1303 expression = self._parse_string() 1304 extend_props(self._parse_properties()) 1305 else: 1306 expression = self._parse_statement() 1307 1308 if return_: 1309 expression = self.expression(exp.Return, this=expression) 1310 elif create_token.token_type == TokenType.INDEX: 1311 this = self._parse_index(index=self._parse_id_var()) 1312 elif create_token.token_type in self.DB_CREATABLES: 1313 table_parts = self._parse_table_parts(schema=True) 1314 1315 # exp.Properties.Location.POST_NAME 1316 self._match(TokenType.COMMA) 1317 extend_props(self._parse_properties(before=True)) 1318 1319 this = self._parse_schema(this=table_parts) 1320 1321 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1322 extend_props(self._parse_properties()) 1323 1324 self._match(TokenType.ALIAS) 1325 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1326 # exp.Properties.Location.POST_ALIAS 1327 extend_props(self._parse_properties()) 1328 1329 expression = self._parse_ddl_select() 1330 1331 if create_token.token_type == TokenType.TABLE: 1332 # exp.Properties.Location.POST_EXPRESSION 1333 extend_props(self._parse_properties()) 1334 1335 indexes = [] 1336 while True: 1337 index = self._parse_index() 1338 1339 # exp.Properties.Location.POST_INDEX 1340 extend_props(self._parse_properties()) 1341 1342 if not index: 1343 break 1344 else: 1345 self._match(TokenType.COMMA) 1346 indexes.append(index) 1347 elif create_token.token_type == TokenType.VIEW: 1348 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1349 no_schema_binding = True 1350 1351 shallow = self._match_text_seq("SHALLOW") 1352 1353 if self._match_texts(self.CLONE_KEYWORDS): 1354 copy = self._prev.text.lower() == "copy" 1355 clone = self._parse_table(schema=True) 1356 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1357 clone_kind = ( 1358 self._match(TokenType.L_PAREN) 1359 and self._match_texts(self.CLONE_KINDS) 1360 and self._prev.text.upper() 1361 ) 1362 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1363 self._match(TokenType.R_PAREN) 1364 clone = self.expression( 1365 exp.Clone, 1366 this=clone, 1367 when=when, 1368 kind=clone_kind, 1369 shallow=shallow, 1370 expression=clone_expression, 1371 copy=copy, 1372 ) 1373 1374 return self.expression( 1375 exp.Create, 1376 comments=comments, 1377 this=this, 1378 kind=create_token.text, 1379 replace=replace, 1380 unique=unique, 1381 expression=expression, 1382 exists=exists, 1383 properties=properties, 1384 indexes=indexes, 1385 no_schema_binding=no_schema_binding, 1386 begin=begin, 1387 clone=clone, 1388 ) 1389 1390 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1391 # only used for teradata currently 1392 self._match(TokenType.COMMA) 1393 1394 kwargs = { 1395 "no": self._match_text_seq("NO"), 1396 "dual": self._match_text_seq("DUAL"), 1397 "before": self._match_text_seq("BEFORE"), 1398 "default": self._match_text_seq("DEFAULT"), 1399 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1400 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1401 "after": self._match_text_seq("AFTER"), 1402 "minimum": self._match_texts(("MIN", "MINIMUM")), 1403 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1404 } 1405 1406 if self._match_texts(self.PROPERTY_PARSERS): 1407 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1408 try: 1409 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1410 except TypeError: 1411 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1412 1413 return None 1414 1415 def _parse_property(self) -> t.Optional[exp.Expression]: 1416 if self._match_texts(self.PROPERTY_PARSERS): 1417 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1418 1419 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1420 return self._parse_character_set(default=True) 1421 1422 if self._match_text_seq("COMPOUND", "SORTKEY"): 1423 return self._parse_sortkey(compound=True) 1424 1425 if self._match_text_seq("SQL", "SECURITY"): 1426 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1427 1428 index = self._index 1429 key = self._parse_column() 1430 1431 if not self._match(TokenType.EQ): 1432 self._retreat(index) 1433 return None 1434 1435 return self.expression( 1436 exp.Property, 1437 this=key.to_dot() if isinstance(key, exp.Column) else key, 1438 value=self._parse_column() or self._parse_var(any_token=True), 1439 ) 1440 1441 def _parse_stored(self) -> exp.FileFormatProperty: 1442 self._match(TokenType.ALIAS) 1443 1444 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1445 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1446 1447 return self.expression( 1448 exp.FileFormatProperty, 1449 this=self.expression( 1450 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1451 ) 1452 if input_format or output_format 1453 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1454 ) 1455 1456 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1457 self._match(TokenType.EQ) 1458 self._match(TokenType.ALIAS) 1459 return self.expression(exp_class, this=self._parse_field()) 1460 1461 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1462 properties = [] 1463 while True: 1464 if before: 1465 prop = self._parse_property_before() 1466 else: 1467 prop = self._parse_property() 1468 1469 if not prop: 1470 break 1471 for p in ensure_list(prop): 1472 properties.append(p) 1473 1474 if properties: 1475 return self.expression(exp.Properties, expressions=properties) 1476 1477 return None 1478 1479 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1480 return self.expression( 1481 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1482 ) 1483 1484 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1485 if self._index >= 2: 1486 pre_volatile_token = self._tokens[self._index - 2] 1487 else: 1488 pre_volatile_token = None 1489 1490 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1491 return exp.VolatileProperty() 1492 1493 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1494 1495 def _parse_with_property( 1496 self, 1497 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1498 if self._match(TokenType.L_PAREN, advance=False): 1499 return self._parse_wrapped_csv(self._parse_property) 1500 1501 if self._match_text_seq("JOURNAL"): 1502 return self._parse_withjournaltable() 1503 1504 if self._match_text_seq("DATA"): 1505 return self._parse_withdata(no=False) 1506 elif self._match_text_seq("NO", "DATA"): 1507 return self._parse_withdata(no=True) 1508 1509 if not self._next: 1510 return None 1511 1512 return self._parse_withisolatedloading() 1513 1514 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1515 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1516 self._match(TokenType.EQ) 1517 1518 user = self._parse_id_var() 1519 self._match(TokenType.PARAMETER) 1520 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1521 1522 if not user or not host: 1523 return None 1524 1525 return exp.DefinerProperty(this=f"{user}@{host}") 1526 1527 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1528 self._match(TokenType.TABLE) 1529 self._match(TokenType.EQ) 1530 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1531 1532 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1533 return self.expression(exp.LogProperty, no=no) 1534 1535 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1536 return self.expression(exp.JournalProperty, **kwargs) 1537 1538 def _parse_checksum(self) -> exp.ChecksumProperty: 1539 self._match(TokenType.EQ) 1540 1541 on = None 1542 if self._match(TokenType.ON): 1543 on = True 1544 elif self._match_text_seq("OFF"): 1545 on = False 1546 1547 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1548 1549 def _parse_cluster(self) -> exp.Cluster: 1550 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1551 1552 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1553 self._match_text_seq("BY") 1554 1555 self._match_l_paren() 1556 expressions = self._parse_csv(self._parse_column) 1557 self._match_r_paren() 1558 1559 if self._match_text_seq("SORTED", "BY"): 1560 self._match_l_paren() 1561 sorted_by = self._parse_csv(self._parse_ordered) 1562 self._match_r_paren() 1563 else: 1564 sorted_by = None 1565 1566 self._match(TokenType.INTO) 1567 buckets = self._parse_number() 1568 self._match_text_seq("BUCKETS") 1569 1570 return self.expression( 1571 exp.ClusteredByProperty, 1572 expressions=expressions, 1573 sorted_by=sorted_by, 1574 buckets=buckets, 1575 ) 1576 1577 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1578 if not self._match_text_seq("GRANTS"): 1579 self._retreat(self._index - 1) 1580 return None 1581 1582 return self.expression(exp.CopyGrantsProperty) 1583 1584 def _parse_freespace(self) -> exp.FreespaceProperty: 1585 self._match(TokenType.EQ) 1586 return self.expression( 1587 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1588 ) 1589 1590 def _parse_mergeblockratio( 1591 self, no: bool = False, default: bool = False 1592 ) -> exp.MergeBlockRatioProperty: 1593 if self._match(TokenType.EQ): 1594 return self.expression( 1595 exp.MergeBlockRatioProperty, 1596 this=self._parse_number(), 1597 percent=self._match(TokenType.PERCENT), 1598 ) 1599 1600 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1601 1602 def _parse_datablocksize( 1603 self, 1604 default: t.Optional[bool] = None, 1605 minimum: t.Optional[bool] = None, 1606 maximum: t.Optional[bool] = None, 1607 ) -> exp.DataBlocksizeProperty: 1608 self._match(TokenType.EQ) 1609 size = self._parse_number() 1610 1611 units = None 1612 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1613 units = self._prev.text 1614 1615 return self.expression( 1616 exp.DataBlocksizeProperty, 1617 size=size, 1618 units=units, 1619 default=default, 1620 minimum=minimum, 1621 maximum=maximum, 1622 ) 1623 1624 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1625 self._match(TokenType.EQ) 1626 always = self._match_text_seq("ALWAYS") 1627 manual = self._match_text_seq("MANUAL") 1628 never = self._match_text_seq("NEVER") 1629 default = self._match_text_seq("DEFAULT") 1630 1631 autotemp = None 1632 if self._match_text_seq("AUTOTEMP"): 1633 autotemp = self._parse_schema() 1634 1635 return self.expression( 1636 exp.BlockCompressionProperty, 1637 always=always, 1638 manual=manual, 1639 never=never, 1640 default=default, 1641 autotemp=autotemp, 1642 ) 1643 1644 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1645 no = self._match_text_seq("NO") 1646 concurrent = self._match_text_seq("CONCURRENT") 1647 self._match_text_seq("ISOLATED", "LOADING") 1648 for_all = self._match_text_seq("FOR", "ALL") 1649 for_insert = self._match_text_seq("FOR", "INSERT") 1650 for_none = self._match_text_seq("FOR", "NONE") 1651 return self.expression( 1652 exp.IsolatedLoadingProperty, 1653 no=no, 1654 concurrent=concurrent, 1655 for_all=for_all, 1656 for_insert=for_insert, 1657 for_none=for_none, 1658 ) 1659 1660 def _parse_locking(self) -> exp.LockingProperty: 1661 if self._match(TokenType.TABLE): 1662 kind = "TABLE" 1663 elif self._match(TokenType.VIEW): 1664 kind = "VIEW" 1665 elif self._match(TokenType.ROW): 1666 kind = "ROW" 1667 elif self._match_text_seq("DATABASE"): 1668 kind = "DATABASE" 1669 else: 1670 kind = None 1671 1672 if kind in ("DATABASE", "TABLE", "VIEW"): 1673 this = self._parse_table_parts() 1674 else: 1675 this = None 1676 1677 if self._match(TokenType.FOR): 1678 for_or_in = "FOR" 1679 elif self._match(TokenType.IN): 1680 for_or_in = "IN" 1681 else: 1682 for_or_in = None 1683 1684 if self._match_text_seq("ACCESS"): 1685 lock_type = "ACCESS" 1686 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1687 lock_type = "EXCLUSIVE" 1688 elif self._match_text_seq("SHARE"): 1689 lock_type = "SHARE" 1690 elif self._match_text_seq("READ"): 1691 lock_type = "READ" 1692 elif self._match_text_seq("WRITE"): 1693 lock_type = "WRITE" 1694 elif self._match_text_seq("CHECKSUM"): 1695 lock_type = "CHECKSUM" 1696 else: 1697 lock_type = None 1698 1699 override = self._match_text_seq("OVERRIDE") 1700 1701 return self.expression( 1702 exp.LockingProperty, 1703 this=this, 1704 kind=kind, 1705 for_or_in=for_or_in, 1706 lock_type=lock_type, 1707 override=override, 1708 ) 1709 1710 def _parse_partition_by(self) -> t.List[exp.Expression]: 1711 if self._match(TokenType.PARTITION_BY): 1712 return self._parse_csv(self._parse_conjunction) 1713 return [] 1714 1715 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1716 self._match(TokenType.EQ) 1717 return self.expression( 1718 exp.PartitionedByProperty, 1719 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1720 ) 1721 1722 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1723 if self._match_text_seq("AND", "STATISTICS"): 1724 statistics = True 1725 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1726 statistics = False 1727 else: 1728 statistics = None 1729 1730 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1731 1732 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1733 if self._match_text_seq("PRIMARY", "INDEX"): 1734 return exp.NoPrimaryIndexProperty() 1735 return None 1736 1737 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1738 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1739 return exp.OnCommitProperty() 1740 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1741 return exp.OnCommitProperty(delete=True) 1742 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1743 1744 def _parse_distkey(self) -> exp.DistKeyProperty: 1745 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1746 1747 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1748 table = self._parse_table(schema=True) 1749 1750 options = [] 1751 while self._match_texts(("INCLUDING", "EXCLUDING")): 1752 this = self._prev.text.upper() 1753 1754 id_var = self._parse_id_var() 1755 if not id_var: 1756 return None 1757 1758 options.append( 1759 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1760 ) 1761 1762 return self.expression(exp.LikeProperty, this=table, expressions=options) 1763 1764 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1765 return self.expression( 1766 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1767 ) 1768 1769 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1770 self._match(TokenType.EQ) 1771 return self.expression( 1772 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1773 ) 1774 1775 def _parse_returns(self) -> exp.ReturnsProperty: 1776 value: t.Optional[exp.Expression] 1777 is_table = self._match(TokenType.TABLE) 1778 1779 if is_table: 1780 if self._match(TokenType.LT): 1781 value = self.expression( 1782 exp.Schema, 1783 this="TABLE", 1784 expressions=self._parse_csv(self._parse_struct_types), 1785 ) 1786 if not self._match(TokenType.GT): 1787 self.raise_error("Expecting >") 1788 else: 1789 value = self._parse_schema(exp.var("TABLE")) 1790 else: 1791 value = self._parse_types() 1792 1793 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1794 1795 def _parse_describe(self) -> exp.Describe: 1796 kind = self._match_set(self.CREATABLES) and self._prev.text 1797 this = self._parse_table(schema=True) 1798 properties = self._parse_properties() 1799 expressions = properties.expressions if properties else None 1800 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1801 1802 def _parse_insert(self) -> exp.Insert: 1803 comments = ensure_list(self._prev_comments) 1804 overwrite = self._match(TokenType.OVERWRITE) 1805 ignore = self._match(TokenType.IGNORE) 1806 local = self._match_text_seq("LOCAL") 1807 alternative = None 1808 1809 if self._match_text_seq("DIRECTORY"): 1810 this: t.Optional[exp.Expression] = self.expression( 1811 exp.Directory, 1812 this=self._parse_var_or_string(), 1813 local=local, 1814 row_format=self._parse_row_format(match_row=True), 1815 ) 1816 else: 1817 if self._match(TokenType.OR): 1818 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1819 1820 self._match(TokenType.INTO) 1821 comments += ensure_list(self._prev_comments) 1822 self._match(TokenType.TABLE) 1823 this = self._parse_table(schema=True) 1824 1825 returning = self._parse_returning() 1826 1827 return self.expression( 1828 exp.Insert, 1829 comments=comments, 1830 this=this, 1831 by_name=self._match_text_seq("BY", "NAME"), 1832 exists=self._parse_exists(), 1833 partition=self._parse_partition(), 1834 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1835 and self._parse_conjunction(), 1836 expression=self._parse_ddl_select(), 1837 conflict=self._parse_on_conflict(), 1838 returning=returning or self._parse_returning(), 1839 overwrite=overwrite, 1840 alternative=alternative, 1841 ignore=ignore, 1842 ) 1843 1844 def _parse_kill(self) -> exp.Kill: 1845 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1846 1847 return self.expression( 1848 exp.Kill, 1849 this=self._parse_primary(), 1850 kind=kind, 1851 ) 1852 1853 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1854 conflict = self._match_text_seq("ON", "CONFLICT") 1855 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1856 1857 if not conflict and not duplicate: 1858 return None 1859 1860 nothing = None 1861 expressions = None 1862 key = None 1863 constraint = None 1864 1865 if conflict: 1866 if self._match_text_seq("ON", "CONSTRAINT"): 1867 constraint = self._parse_id_var() 1868 else: 1869 key = self._parse_csv(self._parse_value) 1870 1871 self._match_text_seq("DO") 1872 if self._match_text_seq("NOTHING"): 1873 nothing = True 1874 else: 1875 self._match(TokenType.UPDATE) 1876 self._match(TokenType.SET) 1877 expressions = self._parse_csv(self._parse_equality) 1878 1879 return self.expression( 1880 exp.OnConflict, 1881 duplicate=duplicate, 1882 expressions=expressions, 1883 nothing=nothing, 1884 key=key, 1885 constraint=constraint, 1886 ) 1887 1888 def _parse_returning(self) -> t.Optional[exp.Returning]: 1889 if not self._match(TokenType.RETURNING): 1890 return None 1891 return self.expression( 1892 exp.Returning, 1893 expressions=self._parse_csv(self._parse_expression), 1894 into=self._match(TokenType.INTO) and self._parse_table_part(), 1895 ) 1896 1897 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1898 if not self._match(TokenType.FORMAT): 1899 return None 1900 return self._parse_row_format() 1901 1902 def _parse_row_format( 1903 self, match_row: bool = False 1904 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1905 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1906 return None 1907 1908 if self._match_text_seq("SERDE"): 1909 this = self._parse_string() 1910 1911 serde_properties = None 1912 if self._match(TokenType.SERDE_PROPERTIES): 1913 serde_properties = self.expression( 1914 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1915 ) 1916 1917 return self.expression( 1918 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1919 ) 1920 1921 self._match_text_seq("DELIMITED") 1922 1923 kwargs = {} 1924 1925 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1926 kwargs["fields"] = self._parse_string() 1927 if self._match_text_seq("ESCAPED", "BY"): 1928 kwargs["escaped"] = self._parse_string() 1929 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1930 kwargs["collection_items"] = self._parse_string() 1931 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1932 kwargs["map_keys"] = self._parse_string() 1933 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1934 kwargs["lines"] = self._parse_string() 1935 if self._match_text_seq("NULL", "DEFINED", "AS"): 1936 kwargs["null"] = self._parse_string() 1937 1938 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1939 1940 def _parse_load(self) -> exp.LoadData | exp.Command: 1941 if self._match_text_seq("DATA"): 1942 local = self._match_text_seq("LOCAL") 1943 self._match_text_seq("INPATH") 1944 inpath = self._parse_string() 1945 overwrite = self._match(TokenType.OVERWRITE) 1946 self._match_pair(TokenType.INTO, TokenType.TABLE) 1947 1948 return self.expression( 1949 exp.LoadData, 1950 this=self._parse_table(schema=True), 1951 local=local, 1952 overwrite=overwrite, 1953 inpath=inpath, 1954 partition=self._parse_partition(), 1955 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1956 serde=self._match_text_seq("SERDE") and self._parse_string(), 1957 ) 1958 return self._parse_as_command(self._prev) 1959 1960 def _parse_delete(self) -> exp.Delete: 1961 # This handles MySQL's "Multiple-Table Syntax" 1962 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1963 tables = None 1964 comments = self._prev_comments 1965 if not self._match(TokenType.FROM, advance=False): 1966 tables = self._parse_csv(self._parse_table) or None 1967 1968 returning = self._parse_returning() 1969 1970 return self.expression( 1971 exp.Delete, 1972 comments=comments, 1973 tables=tables, 1974 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1975 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1976 where=self._parse_where(), 1977 returning=returning or self._parse_returning(), 1978 limit=self._parse_limit(), 1979 ) 1980 1981 def _parse_update(self) -> exp.Update: 1982 comments = self._prev_comments 1983 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1984 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1985 returning = self._parse_returning() 1986 return self.expression( 1987 exp.Update, 1988 comments=comments, 1989 **{ # type: ignore 1990 "this": this, 1991 "expressions": expressions, 1992 "from": self._parse_from(joins=True), 1993 "where": self._parse_where(), 1994 "returning": returning or self._parse_returning(), 1995 "order": self._parse_order(), 1996 "limit": self._parse_limit(), 1997 }, 1998 ) 1999 2000 def _parse_uncache(self) -> exp.Uncache: 2001 if not self._match(TokenType.TABLE): 2002 self.raise_error("Expecting TABLE after UNCACHE") 2003 2004 return self.expression( 2005 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2006 ) 2007 2008 def _parse_cache(self) -> exp.Cache: 2009 lazy = self._match_text_seq("LAZY") 2010 self._match(TokenType.TABLE) 2011 table = self._parse_table(schema=True) 2012 2013 options = [] 2014 if self._match_text_seq("OPTIONS"): 2015 self._match_l_paren() 2016 k = self._parse_string() 2017 self._match(TokenType.EQ) 2018 v = self._parse_string() 2019 options = [k, v] 2020 self._match_r_paren() 2021 2022 self._match(TokenType.ALIAS) 2023 return self.expression( 2024 exp.Cache, 2025 this=table, 2026 lazy=lazy, 2027 options=options, 2028 expression=self._parse_select(nested=True), 2029 ) 2030 2031 def _parse_partition(self) -> t.Optional[exp.Partition]: 2032 if not self._match(TokenType.PARTITION): 2033 return None 2034 2035 return self.expression( 2036 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2037 ) 2038 2039 def _parse_value(self) -> exp.Tuple: 2040 if self._match(TokenType.L_PAREN): 2041 expressions = self._parse_csv(self._parse_conjunction) 2042 self._match_r_paren() 2043 return self.expression(exp.Tuple, expressions=expressions) 2044 2045 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2046 # https://prestodb.io/docs/current/sql/values.html 2047 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2048 2049 def _parse_projections(self) -> t.List[exp.Expression]: 2050 return self._parse_expressions() 2051 2052 def _parse_select( 2053 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2054 ) -> t.Optional[exp.Expression]: 2055 cte = self._parse_with() 2056 2057 if cte: 2058 this = self._parse_statement() 2059 2060 if not this: 2061 self.raise_error("Failed to parse any statement following CTE") 2062 return cte 2063 2064 if "with" in this.arg_types: 2065 this.set("with", cte) 2066 else: 2067 self.raise_error(f"{this.key} does not support CTE") 2068 this = cte 2069 2070 return this 2071 2072 # duckdb supports leading with FROM x 2073 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2074 2075 if self._match(TokenType.SELECT): 2076 comments = self._prev_comments 2077 2078 hint = self._parse_hint() 2079 all_ = self._match(TokenType.ALL) 2080 distinct = self._match_set(self.DISTINCT_TOKENS) 2081 2082 kind = ( 2083 self._match(TokenType.ALIAS) 2084 and self._match_texts(("STRUCT", "VALUE")) 2085 and self._prev.text 2086 ) 2087 2088 if distinct: 2089 distinct = self.expression( 2090 exp.Distinct, 2091 on=self._parse_value() if self._match(TokenType.ON) else None, 2092 ) 2093 2094 if all_ and distinct: 2095 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2096 2097 limit = self._parse_limit(top=True) 2098 projections = self._parse_projections() 2099 2100 this = self.expression( 2101 exp.Select, 2102 kind=kind, 2103 hint=hint, 2104 distinct=distinct, 2105 expressions=projections, 2106 limit=limit, 2107 ) 2108 this.comments = comments 2109 2110 into = self._parse_into() 2111 if into: 2112 this.set("into", into) 2113 2114 if not from_: 2115 from_ = self._parse_from() 2116 2117 if from_: 2118 this.set("from", from_) 2119 2120 this = self._parse_query_modifiers(this) 2121 elif (table or nested) and self._match(TokenType.L_PAREN): 2122 if self._match(TokenType.PIVOT): 2123 this = self._parse_simplified_pivot() 2124 elif self._match(TokenType.FROM): 2125 this = exp.select("*").from_( 2126 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2127 ) 2128 else: 2129 this = self._parse_table() if table else self._parse_select(nested=True) 2130 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2131 2132 self._match_r_paren() 2133 2134 # We return early here so that the UNION isn't attached to the subquery by the 2135 # following call to _parse_set_operations, but instead becomes the parent node 2136 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2137 elif self._match(TokenType.VALUES): 2138 this = self.expression( 2139 exp.Values, 2140 expressions=self._parse_csv(self._parse_value), 2141 alias=self._parse_table_alias(), 2142 ) 2143 elif from_: 2144 this = exp.select("*").from_(from_.this, copy=False) 2145 else: 2146 this = None 2147 2148 return self._parse_set_operations(this) 2149 2150 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2151 if not skip_with_token and not self._match(TokenType.WITH): 2152 return None 2153 2154 comments = self._prev_comments 2155 recursive = self._match(TokenType.RECURSIVE) 2156 2157 expressions = [] 2158 while True: 2159 expressions.append(self._parse_cte()) 2160 2161 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2162 break 2163 else: 2164 self._match(TokenType.WITH) 2165 2166 return self.expression( 2167 exp.With, comments=comments, expressions=expressions, recursive=recursive 2168 ) 2169 2170 def _parse_cte(self) -> exp.CTE: 2171 alias = self._parse_table_alias() 2172 if not alias or not alias.this: 2173 self.raise_error("Expected CTE to have alias") 2174 2175 self._match(TokenType.ALIAS) 2176 return self.expression( 2177 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2178 ) 2179 2180 def _parse_table_alias( 2181 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2182 ) -> t.Optional[exp.TableAlias]: 2183 any_token = self._match(TokenType.ALIAS) 2184 alias = ( 2185 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2186 or self._parse_string_as_identifier() 2187 ) 2188 2189 index = self._index 2190 if self._match(TokenType.L_PAREN): 2191 columns = self._parse_csv(self._parse_function_parameter) 2192 self._match_r_paren() if columns else self._retreat(index) 2193 else: 2194 columns = None 2195 2196 if not alias and not columns: 2197 return None 2198 2199 return self.expression(exp.TableAlias, this=alias, columns=columns) 2200 2201 def _parse_subquery( 2202 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2203 ) -> t.Optional[exp.Subquery]: 2204 if not this: 2205 return None 2206 2207 return self.expression( 2208 exp.Subquery, 2209 this=this, 2210 pivots=self._parse_pivots(), 2211 alias=self._parse_table_alias() if parse_alias else None, 2212 ) 2213 2214 def _parse_query_modifiers( 2215 self, this: t.Optional[exp.Expression] 2216 ) -> t.Optional[exp.Expression]: 2217 if isinstance(this, self.MODIFIABLES): 2218 for join in iter(self._parse_join, None): 2219 this.append("joins", join) 2220 for lateral in iter(self._parse_lateral, None): 2221 this.append("laterals", lateral) 2222 2223 while True: 2224 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2225 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2226 key, expression = parser(self) 2227 2228 if expression: 2229 this.set(key, expression) 2230 if key == "limit": 2231 offset = expression.args.pop("offset", None) 2232 if offset: 2233 this.set("offset", exp.Offset(expression=offset)) 2234 continue 2235 break 2236 return this 2237 2238 def _parse_hint(self) -> t.Optional[exp.Hint]: 2239 if self._match(TokenType.HINT): 2240 hints = [] 2241 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2242 hints.extend(hint) 2243 2244 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2245 self.raise_error("Expected */ after HINT") 2246 2247 return self.expression(exp.Hint, expressions=hints) 2248 2249 return None 2250 2251 def _parse_into(self) -> t.Optional[exp.Into]: 2252 if not self._match(TokenType.INTO): 2253 return None 2254 2255 temp = self._match(TokenType.TEMPORARY) 2256 unlogged = self._match_text_seq("UNLOGGED") 2257 self._match(TokenType.TABLE) 2258 2259 return self.expression( 2260 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2261 ) 2262 2263 def _parse_from( 2264 self, joins: bool = False, skip_from_token: bool = False 2265 ) -> t.Optional[exp.From]: 2266 if not skip_from_token and not self._match(TokenType.FROM): 2267 return None 2268 2269 return self.expression( 2270 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2271 ) 2272 2273 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2274 if not self._match(TokenType.MATCH_RECOGNIZE): 2275 return None 2276 2277 self._match_l_paren() 2278 2279 partition = self._parse_partition_by() 2280 order = self._parse_order() 2281 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2282 2283 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2284 rows = exp.var("ONE ROW PER MATCH") 2285 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2286 text = "ALL ROWS PER MATCH" 2287 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2288 text += f" SHOW EMPTY MATCHES" 2289 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2290 text += f" OMIT EMPTY MATCHES" 2291 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2292 text += f" WITH UNMATCHED ROWS" 2293 rows = exp.var(text) 2294 else: 2295 rows = None 2296 2297 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2298 text = "AFTER MATCH SKIP" 2299 if self._match_text_seq("PAST", "LAST", "ROW"): 2300 text += f" PAST LAST ROW" 2301 elif self._match_text_seq("TO", "NEXT", "ROW"): 2302 text += f" TO NEXT ROW" 2303 elif self._match_text_seq("TO", "FIRST"): 2304 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2305 elif self._match_text_seq("TO", "LAST"): 2306 text += f" TO LAST {self._advance_any().text}" # type: ignore 2307 after = exp.var(text) 2308 else: 2309 after = None 2310 2311 if self._match_text_seq("PATTERN"): 2312 self._match_l_paren() 2313 2314 if not self._curr: 2315 self.raise_error("Expecting )", self._curr) 2316 2317 paren = 1 2318 start = self._curr 2319 2320 while self._curr and paren > 0: 2321 if self._curr.token_type == TokenType.L_PAREN: 2322 paren += 1 2323 if self._curr.token_type == TokenType.R_PAREN: 2324 paren -= 1 2325 2326 end = self._prev 2327 self._advance() 2328 2329 if paren > 0: 2330 self.raise_error("Expecting )", self._curr) 2331 2332 pattern = exp.var(self._find_sql(start, end)) 2333 else: 2334 pattern = None 2335 2336 define = ( 2337 self._parse_csv( 2338 lambda: self.expression( 2339 exp.Alias, 2340 alias=self._parse_id_var(any_token=True), 2341 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2342 ) 2343 ) 2344 if self._match_text_seq("DEFINE") 2345 else None 2346 ) 2347 2348 self._match_r_paren() 2349 2350 return self.expression( 2351 exp.MatchRecognize, 2352 partition_by=partition, 2353 order=order, 2354 measures=measures, 2355 rows=rows, 2356 after=after, 2357 pattern=pattern, 2358 define=define, 2359 alias=self._parse_table_alias(), 2360 ) 2361 2362 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2363 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2364 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2365 2366 if outer_apply or cross_apply: 2367 this = self._parse_select(table=True) 2368 view = None 2369 outer = not cross_apply 2370 elif self._match(TokenType.LATERAL): 2371 this = self._parse_select(table=True) 2372 view = self._match(TokenType.VIEW) 2373 outer = self._match(TokenType.OUTER) 2374 else: 2375 return None 2376 2377 if not this: 2378 this = ( 2379 self._parse_unnest() 2380 or self._parse_function() 2381 or self._parse_id_var(any_token=False) 2382 ) 2383 2384 while self._match(TokenType.DOT): 2385 this = exp.Dot( 2386 this=this, 2387 expression=self._parse_function() or self._parse_id_var(any_token=False), 2388 ) 2389 2390 if view: 2391 table = self._parse_id_var(any_token=False) 2392 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2393 table_alias: t.Optional[exp.TableAlias] = self.expression( 2394 exp.TableAlias, this=table, columns=columns 2395 ) 2396 elif isinstance(this, exp.Subquery) and this.alias: 2397 # Ensures parity between the Subquery's and the Lateral's "alias" args 2398 table_alias = this.args["alias"].copy() 2399 else: 2400 table_alias = self._parse_table_alias() 2401 2402 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2403 2404 def _parse_join_parts( 2405 self, 2406 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2407 return ( 2408 self._match_set(self.JOIN_METHODS) and self._prev, 2409 self._match_set(self.JOIN_SIDES) and self._prev, 2410 self._match_set(self.JOIN_KINDS) and self._prev, 2411 ) 2412 2413 def _parse_join( 2414 self, skip_join_token: bool = False, parse_bracket: bool = False 2415 ) -> t.Optional[exp.Join]: 2416 if self._match(TokenType.COMMA): 2417 return self.expression(exp.Join, this=self._parse_table()) 2418 2419 index = self._index 2420 method, side, kind = self._parse_join_parts() 2421 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2422 join = self._match(TokenType.JOIN) 2423 2424 if not skip_join_token and not join: 2425 self._retreat(index) 2426 kind = None 2427 method = None 2428 side = None 2429 2430 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2431 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2432 2433 if not skip_join_token and not join and not outer_apply and not cross_apply: 2434 return None 2435 2436 if outer_apply: 2437 side = Token(TokenType.LEFT, "LEFT") 2438 2439 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2440 2441 if method: 2442 kwargs["method"] = method.text 2443 if side: 2444 kwargs["side"] = side.text 2445 if kind: 2446 kwargs["kind"] = kind.text 2447 if hint: 2448 kwargs["hint"] = hint 2449 2450 if self._match(TokenType.ON): 2451 kwargs["on"] = self._parse_conjunction() 2452 elif self._match(TokenType.USING): 2453 kwargs["using"] = self._parse_wrapped_id_vars() 2454 elif not (kind and kind.token_type == TokenType.CROSS): 2455 index = self._index 2456 joins = self._parse_joins() 2457 2458 if joins and self._match(TokenType.ON): 2459 kwargs["on"] = self._parse_conjunction() 2460 elif joins and self._match(TokenType.USING): 2461 kwargs["using"] = self._parse_wrapped_id_vars() 2462 else: 2463 joins = None 2464 self._retreat(index) 2465 2466 kwargs["this"].set("joins", joins) 2467 2468 comments = [c for token in (method, side, kind) if token for c in token.comments] 2469 return self.expression(exp.Join, comments=comments, **kwargs) 2470 2471 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2472 this = self._parse_conjunction() 2473 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2474 return this 2475 2476 opclass = self._parse_var(any_token=True) 2477 if opclass: 2478 return self.expression(exp.Opclass, this=this, expression=opclass) 2479 2480 return this 2481 2482 def _parse_index( 2483 self, 2484 index: t.Optional[exp.Expression] = None, 2485 ) -> t.Optional[exp.Index]: 2486 if index: 2487 unique = None 2488 primary = None 2489 amp = None 2490 2491 self._match(TokenType.ON) 2492 self._match(TokenType.TABLE) # hive 2493 table = self._parse_table_parts(schema=True) 2494 else: 2495 unique = self._match(TokenType.UNIQUE) 2496 primary = self._match_text_seq("PRIMARY") 2497 amp = self._match_text_seq("AMP") 2498 2499 if not self._match(TokenType.INDEX): 2500 return None 2501 2502 index = self._parse_id_var() 2503 table = None 2504 2505 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2506 2507 if self._match(TokenType.L_PAREN, advance=False): 2508 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2509 else: 2510 columns = None 2511 2512 return self.expression( 2513 exp.Index, 2514 this=index, 2515 table=table, 2516 using=using, 2517 columns=columns, 2518 unique=unique, 2519 primary=primary, 2520 amp=amp, 2521 partition_by=self._parse_partition_by(), 2522 where=self._parse_where(), 2523 ) 2524 2525 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2526 hints: t.List[exp.Expression] = [] 2527 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2528 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2529 hints.append( 2530 self.expression( 2531 exp.WithTableHint, 2532 expressions=self._parse_csv( 2533 lambda: self._parse_function() or self._parse_var(any_token=True) 2534 ), 2535 ) 2536 ) 2537 self._match_r_paren() 2538 else: 2539 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2540 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2541 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2542 2543 self._match_texts({"INDEX", "KEY"}) 2544 if self._match(TokenType.FOR): 2545 hint.set("target", self._advance_any() and self._prev.text.upper()) 2546 2547 hint.set("expressions", self._parse_wrapped_id_vars()) 2548 hints.append(hint) 2549 2550 return hints or None 2551 2552 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2553 return ( 2554 (not schema and self._parse_function(optional_parens=False)) 2555 or self._parse_id_var(any_token=False) 2556 or self._parse_string_as_identifier() 2557 or self._parse_placeholder() 2558 ) 2559 2560 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2561 catalog = None 2562 db = None 2563 table = self._parse_table_part(schema=schema) 2564 2565 while self._match(TokenType.DOT): 2566 if catalog: 2567 # This allows nesting the table in arbitrarily many dot expressions if needed 2568 table = self.expression( 2569 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2570 ) 2571 else: 2572 catalog = db 2573 db = table 2574 table = self._parse_table_part(schema=schema) 2575 2576 if not table: 2577 self.raise_error(f"Expected table name but got {self._curr}") 2578 2579 return self.expression( 2580 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2581 ) 2582 2583 def _parse_table( 2584 self, 2585 schema: bool = False, 2586 joins: bool = False, 2587 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2588 parse_bracket: bool = False, 2589 ) -> t.Optional[exp.Expression]: 2590 lateral = self._parse_lateral() 2591 if lateral: 2592 return lateral 2593 2594 unnest = self._parse_unnest() 2595 if unnest: 2596 return unnest 2597 2598 values = self._parse_derived_table_values() 2599 if values: 2600 return values 2601 2602 subquery = self._parse_select(table=True) 2603 if subquery: 2604 if not subquery.args.get("pivots"): 2605 subquery.set("pivots", self._parse_pivots()) 2606 return subquery 2607 2608 bracket = parse_bracket and self._parse_bracket(None) 2609 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2610 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2611 2612 if schema: 2613 return self._parse_schema(this=this) 2614 2615 version = self._parse_version() 2616 2617 if version: 2618 this.set("version", version) 2619 2620 if self.ALIAS_POST_TABLESAMPLE: 2621 table_sample = self._parse_table_sample() 2622 2623 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2624 if alias: 2625 this.set("alias", alias) 2626 2627 this.set("hints", self._parse_table_hints()) 2628 2629 if not this.args.get("pivots"): 2630 this.set("pivots", self._parse_pivots()) 2631 2632 if not self.ALIAS_POST_TABLESAMPLE: 2633 table_sample = self._parse_table_sample() 2634 2635 if table_sample: 2636 table_sample.set("this", this) 2637 this = table_sample 2638 2639 if joins: 2640 for join in iter(self._parse_join, None): 2641 this.append("joins", join) 2642 2643 return this 2644 2645 def _parse_version(self) -> t.Optional[exp.Version]: 2646 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2647 this = "TIMESTAMP" 2648 elif self._match(TokenType.VERSION_SNAPSHOT): 2649 this = "VERSION" 2650 else: 2651 return None 2652 2653 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2654 kind = self._prev.text.upper() 2655 start = self._parse_bitwise() 2656 self._match_texts(("TO", "AND")) 2657 end = self._parse_bitwise() 2658 expression: t.Optional[exp.Expression] = self.expression( 2659 exp.Tuple, expressions=[start, end] 2660 ) 2661 elif self._match_text_seq("CONTAINED", "IN"): 2662 kind = "CONTAINED IN" 2663 expression = self.expression( 2664 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2665 ) 2666 elif self._match(TokenType.ALL): 2667 kind = "ALL" 2668 expression = None 2669 else: 2670 self._match_text_seq("AS", "OF") 2671 kind = "AS OF" 2672 expression = self._parse_type() 2673 2674 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2675 2676 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2677 if not self._match(TokenType.UNNEST): 2678 return None 2679 2680 expressions = self._parse_wrapped_csv(self._parse_type) 2681 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2682 2683 alias = self._parse_table_alias() if with_alias else None 2684 2685 if alias: 2686 if self.UNNEST_COLUMN_ONLY: 2687 if alias.args.get("columns"): 2688 self.raise_error("Unexpected extra column alias in unnest.") 2689 2690 alias.set("columns", [alias.this]) 2691 alias.set("this", None) 2692 2693 columns = alias.args.get("columns") or [] 2694 if offset and len(expressions) < len(columns): 2695 offset = columns.pop() 2696 2697 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2698 self._match(TokenType.ALIAS) 2699 offset = self._parse_id_var() or exp.to_identifier("offset") 2700 2701 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2702 2703 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2704 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2705 if not is_derived and not self._match(TokenType.VALUES): 2706 return None 2707 2708 expressions = self._parse_csv(self._parse_value) 2709 alias = self._parse_table_alias() 2710 2711 if is_derived: 2712 self._match_r_paren() 2713 2714 return self.expression( 2715 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2716 ) 2717 2718 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2719 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2720 as_modifier and self._match_text_seq("USING", "SAMPLE") 2721 ): 2722 return None 2723 2724 bucket_numerator = None 2725 bucket_denominator = None 2726 bucket_field = None 2727 percent = None 2728 rows = None 2729 size = None 2730 seed = None 2731 2732 kind = ( 2733 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2734 ) 2735 method = self._parse_var(tokens=(TokenType.ROW,)) 2736 2737 self._match(TokenType.L_PAREN) 2738 2739 if self.TABLESAMPLE_CSV: 2740 num = None 2741 expressions = self._parse_csv(self._parse_primary) 2742 else: 2743 expressions = None 2744 num = self._parse_primary() 2745 2746 if self._match_text_seq("BUCKET"): 2747 bucket_numerator = self._parse_number() 2748 self._match_text_seq("OUT", "OF") 2749 bucket_denominator = bucket_denominator = self._parse_number() 2750 self._match(TokenType.ON) 2751 bucket_field = self._parse_field() 2752 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2753 percent = num 2754 elif self._match(TokenType.ROWS): 2755 rows = num 2756 elif num: 2757 size = num 2758 2759 self._match(TokenType.R_PAREN) 2760 2761 if self._match(TokenType.L_PAREN): 2762 method = self._parse_var() 2763 seed = self._match(TokenType.COMMA) and self._parse_number() 2764 self._match_r_paren() 2765 elif self._match_texts(("SEED", "REPEATABLE")): 2766 seed = self._parse_wrapped(self._parse_number) 2767 2768 return self.expression( 2769 exp.TableSample, 2770 expressions=expressions, 2771 method=method, 2772 bucket_numerator=bucket_numerator, 2773 bucket_denominator=bucket_denominator, 2774 bucket_field=bucket_field, 2775 percent=percent, 2776 rows=rows, 2777 size=size, 2778 seed=seed, 2779 kind=kind, 2780 ) 2781 2782 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2783 return list(iter(self._parse_pivot, None)) or None 2784 2785 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2786 return list(iter(self._parse_join, None)) or None 2787 2788 # https://duckdb.org/docs/sql/statements/pivot 2789 def _parse_simplified_pivot(self) -> exp.Pivot: 2790 def _parse_on() -> t.Optional[exp.Expression]: 2791 this = self._parse_bitwise() 2792 return self._parse_in(this) if self._match(TokenType.IN) else this 2793 2794 this = self._parse_table() 2795 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2796 using = self._match(TokenType.USING) and self._parse_csv( 2797 lambda: self._parse_alias(self._parse_function()) 2798 ) 2799 group = self._parse_group() 2800 return self.expression( 2801 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2802 ) 2803 2804 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2805 index = self._index 2806 include_nulls = None 2807 2808 if self._match(TokenType.PIVOT): 2809 unpivot = False 2810 elif self._match(TokenType.UNPIVOT): 2811 unpivot = True 2812 2813 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2814 if self._match_text_seq("INCLUDE", "NULLS"): 2815 include_nulls = True 2816 elif self._match_text_seq("EXCLUDE", "NULLS"): 2817 include_nulls = False 2818 else: 2819 return None 2820 2821 expressions = [] 2822 field = None 2823 2824 if not self._match(TokenType.L_PAREN): 2825 self._retreat(index) 2826 return None 2827 2828 if unpivot: 2829 expressions = self._parse_csv(self._parse_column) 2830 else: 2831 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2832 2833 if not expressions: 2834 self.raise_error("Failed to parse PIVOT's aggregation list") 2835 2836 if not self._match(TokenType.FOR): 2837 self.raise_error("Expecting FOR") 2838 2839 value = self._parse_column() 2840 2841 if not self._match(TokenType.IN): 2842 self.raise_error("Expecting IN") 2843 2844 field = self._parse_in(value, alias=True) 2845 2846 self._match_r_paren() 2847 2848 pivot = self.expression( 2849 exp.Pivot, 2850 expressions=expressions, 2851 field=field, 2852 unpivot=unpivot, 2853 include_nulls=include_nulls, 2854 ) 2855 2856 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2857 pivot.set("alias", self._parse_table_alias()) 2858 2859 if not unpivot: 2860 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2861 2862 columns: t.List[exp.Expression] = [] 2863 for fld in pivot.args["field"].expressions: 2864 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2865 for name in names: 2866 if self.PREFIXED_PIVOT_COLUMNS: 2867 name = f"{name}_{field_name}" if name else field_name 2868 else: 2869 name = f"{field_name}_{name}" if name else field_name 2870 2871 columns.append(exp.to_identifier(name)) 2872 2873 pivot.set("columns", columns) 2874 2875 return pivot 2876 2877 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2878 return [agg.alias for agg in aggregations] 2879 2880 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2881 if not skip_where_token and not self._match(TokenType.WHERE): 2882 return None 2883 2884 return self.expression( 2885 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2886 ) 2887 2888 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2889 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2890 return None 2891 2892 elements = defaultdict(list) 2893 2894 if self._match(TokenType.ALL): 2895 return self.expression(exp.Group, all=True) 2896 2897 while True: 2898 expressions = self._parse_csv(self._parse_conjunction) 2899 if expressions: 2900 elements["expressions"].extend(expressions) 2901 2902 grouping_sets = self._parse_grouping_sets() 2903 if grouping_sets: 2904 elements["grouping_sets"].extend(grouping_sets) 2905 2906 rollup = None 2907 cube = None 2908 totals = None 2909 2910 with_ = self._match(TokenType.WITH) 2911 if self._match(TokenType.ROLLUP): 2912 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2913 elements["rollup"].extend(ensure_list(rollup)) 2914 2915 if self._match(TokenType.CUBE): 2916 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2917 elements["cube"].extend(ensure_list(cube)) 2918 2919 if self._match_text_seq("TOTALS"): 2920 totals = True 2921 elements["totals"] = True # type: ignore 2922 2923 if not (grouping_sets or rollup or cube or totals): 2924 break 2925 2926 return self.expression(exp.Group, **elements) # type: ignore 2927 2928 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2929 if not self._match(TokenType.GROUPING_SETS): 2930 return None 2931 2932 return self._parse_wrapped_csv(self._parse_grouping_set) 2933 2934 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2935 if self._match(TokenType.L_PAREN): 2936 grouping_set = self._parse_csv(self._parse_column) 2937 self._match_r_paren() 2938 return self.expression(exp.Tuple, expressions=grouping_set) 2939 2940 return self._parse_column() 2941 2942 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2943 if not skip_having_token and not self._match(TokenType.HAVING): 2944 return None 2945 return self.expression(exp.Having, this=self._parse_conjunction()) 2946 2947 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2948 if not self._match(TokenType.QUALIFY): 2949 return None 2950 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2951 2952 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2953 if skip_start_token: 2954 start = None 2955 elif self._match(TokenType.START_WITH): 2956 start = self._parse_conjunction() 2957 else: 2958 return None 2959 2960 self._match(TokenType.CONNECT_BY) 2961 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2962 exp.Prior, this=self._parse_bitwise() 2963 ) 2964 connect = self._parse_conjunction() 2965 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2966 2967 if not start and self._match(TokenType.START_WITH): 2968 start = self._parse_conjunction() 2969 2970 return self.expression(exp.Connect, start=start, connect=connect) 2971 2972 def _parse_order( 2973 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2974 ) -> t.Optional[exp.Expression]: 2975 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2976 return this 2977 2978 return self.expression( 2979 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2980 ) 2981 2982 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2983 if not self._match(token): 2984 return None 2985 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2986 2987 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 2988 this = parse_method() if parse_method else self._parse_conjunction() 2989 2990 asc = self._match(TokenType.ASC) 2991 desc = self._match(TokenType.DESC) or (asc and False) 2992 2993 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2994 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2995 2996 nulls_first = is_nulls_first or False 2997 explicitly_null_ordered = is_nulls_first or is_nulls_last 2998 2999 if ( 3000 not explicitly_null_ordered 3001 and ( 3002 (not desc and self.NULL_ORDERING == "nulls_are_small") 3003 or (desc and self.NULL_ORDERING != "nulls_are_small") 3004 ) 3005 and self.NULL_ORDERING != "nulls_are_last" 3006 ): 3007 nulls_first = True 3008 3009 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3010 3011 def _parse_limit( 3012 self, this: t.Optional[exp.Expression] = None, top: bool = False 3013 ) -> t.Optional[exp.Expression]: 3014 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3015 comments = self._prev_comments 3016 if top: 3017 limit_paren = self._match(TokenType.L_PAREN) 3018 expression = self._parse_number() 3019 3020 if limit_paren: 3021 self._match_r_paren() 3022 else: 3023 expression = self._parse_term() 3024 3025 if self._match(TokenType.COMMA): 3026 offset = expression 3027 expression = self._parse_term() 3028 else: 3029 offset = None 3030 3031 limit_exp = self.expression( 3032 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3033 ) 3034 3035 return limit_exp 3036 3037 if self._match(TokenType.FETCH): 3038 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3039 direction = self._prev.text if direction else "FIRST" 3040 3041 count = self._parse_field(tokens=self.FETCH_TOKENS) 3042 percent = self._match(TokenType.PERCENT) 3043 3044 self._match_set((TokenType.ROW, TokenType.ROWS)) 3045 3046 only = self._match_text_seq("ONLY") 3047 with_ties = self._match_text_seq("WITH", "TIES") 3048 3049 if only and with_ties: 3050 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3051 3052 return self.expression( 3053 exp.Fetch, 3054 direction=direction, 3055 count=count, 3056 percent=percent, 3057 with_ties=with_ties, 3058 ) 3059 3060 return this 3061 3062 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3063 if not self._match(TokenType.OFFSET): 3064 return this 3065 3066 count = self._parse_term() 3067 self._match_set((TokenType.ROW, TokenType.ROWS)) 3068 return self.expression(exp.Offset, this=this, expression=count) 3069 3070 def _parse_locks(self) -> t.List[exp.Lock]: 3071 locks = [] 3072 while True: 3073 if self._match_text_seq("FOR", "UPDATE"): 3074 update = True 3075 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3076 "LOCK", "IN", "SHARE", "MODE" 3077 ): 3078 update = False 3079 else: 3080 break 3081 3082 expressions = None 3083 if self._match_text_seq("OF"): 3084 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3085 3086 wait: t.Optional[bool | exp.Expression] = None 3087 if self._match_text_seq("NOWAIT"): 3088 wait = True 3089 elif self._match_text_seq("WAIT"): 3090 wait = self._parse_primary() 3091 elif self._match_text_seq("SKIP", "LOCKED"): 3092 wait = False 3093 3094 locks.append( 3095 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3096 ) 3097 3098 return locks 3099 3100 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3101 if not self._match_set(self.SET_OPERATIONS): 3102 return this 3103 3104 token_type = self._prev.token_type 3105 3106 if token_type == TokenType.UNION: 3107 expression = exp.Union 3108 elif token_type == TokenType.EXCEPT: 3109 expression = exp.Except 3110 else: 3111 expression = exp.Intersect 3112 3113 return self.expression( 3114 expression, 3115 this=this, 3116 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3117 by_name=self._match_text_seq("BY", "NAME"), 3118 expression=self._parse_set_operations(self._parse_select(nested=True)), 3119 ) 3120 3121 def _parse_expression(self) -> t.Optional[exp.Expression]: 3122 return self._parse_alias(self._parse_conjunction()) 3123 3124 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3125 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3126 3127 def _parse_equality(self) -> t.Optional[exp.Expression]: 3128 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3129 3130 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3131 return self._parse_tokens(self._parse_range, self.COMPARISON) 3132 3133 def _parse_range(self) -> t.Optional[exp.Expression]: 3134 this = self._parse_bitwise() 3135 negate = self._match(TokenType.NOT) 3136 3137 if self._match_set(self.RANGE_PARSERS): 3138 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3139 if not expression: 3140 return this 3141 3142 this = expression 3143 elif self._match(TokenType.ISNULL): 3144 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3145 3146 # Postgres supports ISNULL and NOTNULL for conditions. 3147 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3148 if self._match(TokenType.NOTNULL): 3149 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3150 this = self.expression(exp.Not, this=this) 3151 3152 if negate: 3153 this = self.expression(exp.Not, this=this) 3154 3155 if self._match(TokenType.IS): 3156 this = self._parse_is(this) 3157 3158 return this 3159 3160 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3161 index = self._index - 1 3162 negate = self._match(TokenType.NOT) 3163 3164 if self._match_text_seq("DISTINCT", "FROM"): 3165 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3166 return self.expression(klass, this=this, expression=self._parse_expression()) 3167 3168 expression = self._parse_null() or self._parse_boolean() 3169 if not expression: 3170 self._retreat(index) 3171 return None 3172 3173 this = self.expression(exp.Is, this=this, expression=expression) 3174 return self.expression(exp.Not, this=this) if negate else this 3175 3176 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3177 unnest = self._parse_unnest(with_alias=False) 3178 if unnest: 3179 this = self.expression(exp.In, this=this, unnest=unnest) 3180 elif self._match(TokenType.L_PAREN): 3181 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3182 3183 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3184 this = self.expression(exp.In, this=this, query=expressions[0]) 3185 else: 3186 this = self.expression(exp.In, this=this, expressions=expressions) 3187 3188 self._match_r_paren(this) 3189 else: 3190 this = self.expression(exp.In, this=this, field=self._parse_field()) 3191 3192 return this 3193 3194 def _parse_between(self, this: exp.Expression) -> exp.Between: 3195 low = self._parse_bitwise() 3196 self._match(TokenType.AND) 3197 high = self._parse_bitwise() 3198 return self.expression(exp.Between, this=this, low=low, high=high) 3199 3200 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3201 if not self._match(TokenType.ESCAPE): 3202 return this 3203 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3204 3205 def _parse_interval(self) -> t.Optional[exp.Interval]: 3206 index = self._index 3207 3208 if not self._match(TokenType.INTERVAL): 3209 return None 3210 3211 if self._match(TokenType.STRING, advance=False): 3212 this = self._parse_primary() 3213 else: 3214 this = self._parse_term() 3215 3216 if not this: 3217 self._retreat(index) 3218 return None 3219 3220 unit = self._parse_function() or self._parse_var(any_token=True) 3221 3222 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3223 # each INTERVAL expression into this canonical form so it's easy to transpile 3224 if this and this.is_number: 3225 this = exp.Literal.string(this.name) 3226 elif this and this.is_string: 3227 parts = this.name.split() 3228 3229 if len(parts) == 2: 3230 if unit: 3231 # This is not actually a unit, it's something else (e.g. a "window side") 3232 unit = None 3233 self._retreat(self._index - 1) 3234 3235 this = exp.Literal.string(parts[0]) 3236 unit = self.expression(exp.Var, this=parts[1]) 3237 3238 return self.expression(exp.Interval, this=this, unit=unit) 3239 3240 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3241 this = self._parse_term() 3242 3243 while True: 3244 if self._match_set(self.BITWISE): 3245 this = self.expression( 3246 self.BITWISE[self._prev.token_type], 3247 this=this, 3248 expression=self._parse_term(), 3249 ) 3250 elif self._match(TokenType.DQMARK): 3251 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3252 elif self._match_pair(TokenType.LT, TokenType.LT): 3253 this = self.expression( 3254 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3255 ) 3256 elif self._match_pair(TokenType.GT, TokenType.GT): 3257 this = self.expression( 3258 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3259 ) 3260 else: 3261 break 3262 3263 return this 3264 3265 def _parse_term(self) -> t.Optional[exp.Expression]: 3266 return self._parse_tokens(self._parse_factor, self.TERM) 3267 3268 def _parse_factor(self) -> t.Optional[exp.Expression]: 3269 return self._parse_tokens(self._parse_unary, self.FACTOR) 3270 3271 def _parse_unary(self) -> t.Optional[exp.Expression]: 3272 if self._match_set(self.UNARY_PARSERS): 3273 return self.UNARY_PARSERS[self._prev.token_type](self) 3274 return self._parse_at_time_zone(self._parse_type()) 3275 3276 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3277 interval = parse_interval and self._parse_interval() 3278 if interval: 3279 return interval 3280 3281 index = self._index 3282 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3283 this = self._parse_column() 3284 3285 if data_type: 3286 if isinstance(this, exp.Literal): 3287 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3288 if parser: 3289 return parser(self, this, data_type) 3290 return self.expression(exp.Cast, this=this, to=data_type) 3291 if not data_type.expressions: 3292 self._retreat(index) 3293 return self._parse_column() 3294 return self._parse_column_ops(data_type) 3295 3296 return this and self._parse_column_ops(this) 3297 3298 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3299 this = self._parse_type() 3300 if not this: 3301 return None 3302 3303 return self.expression( 3304 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3305 ) 3306 3307 def _parse_types( 3308 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3309 ) -> t.Optional[exp.Expression]: 3310 index = self._index 3311 3312 prefix = self._match_text_seq("SYSUDTLIB", ".") 3313 3314 if not self._match_set(self.TYPE_TOKENS): 3315 identifier = allow_identifiers and self._parse_id_var( 3316 any_token=False, tokens=(TokenType.VAR,) 3317 ) 3318 3319 if identifier: 3320 tokens = self._tokenizer.tokenize(identifier.name) 3321 3322 if len(tokens) != 1: 3323 self.raise_error("Unexpected identifier", self._prev) 3324 3325 if tokens[0].token_type in self.TYPE_TOKENS: 3326 self._prev = tokens[0] 3327 elif self.SUPPORTS_USER_DEFINED_TYPES: 3328 type_name = identifier.name 3329 3330 while self._match(TokenType.DOT): 3331 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3332 3333 return exp.DataType.build(type_name, udt=True) 3334 else: 3335 return None 3336 else: 3337 return None 3338 3339 type_token = self._prev.token_type 3340 3341 if type_token == TokenType.PSEUDO_TYPE: 3342 return self.expression(exp.PseudoType, this=self._prev.text) 3343 3344 if type_token == TokenType.OBJECT_IDENTIFIER: 3345 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3346 3347 nested = type_token in self.NESTED_TYPE_TOKENS 3348 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3349 expressions = None 3350 maybe_func = False 3351 3352 if self._match(TokenType.L_PAREN): 3353 if is_struct: 3354 expressions = self._parse_csv(self._parse_struct_types) 3355 elif nested: 3356 expressions = self._parse_csv( 3357 lambda: self._parse_types( 3358 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3359 ) 3360 ) 3361 elif type_token in self.ENUM_TYPE_TOKENS: 3362 expressions = self._parse_csv(self._parse_equality) 3363 else: 3364 expressions = self._parse_csv(self._parse_type_size) 3365 3366 if not expressions or not self._match(TokenType.R_PAREN): 3367 self._retreat(index) 3368 return None 3369 3370 maybe_func = True 3371 3372 this: t.Optional[exp.Expression] = None 3373 values: t.Optional[t.List[exp.Expression]] = None 3374 3375 if nested and self._match(TokenType.LT): 3376 if is_struct: 3377 expressions = self._parse_csv(self._parse_struct_types) 3378 else: 3379 expressions = self._parse_csv( 3380 lambda: self._parse_types( 3381 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3382 ) 3383 ) 3384 3385 if not self._match(TokenType.GT): 3386 self.raise_error("Expecting >") 3387 3388 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3389 values = self._parse_csv(self._parse_conjunction) 3390 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3391 3392 if type_token in self.TIMESTAMPS: 3393 if self._match_text_seq("WITH", "TIME", "ZONE"): 3394 maybe_func = False 3395 tz_type = ( 3396 exp.DataType.Type.TIMETZ 3397 if type_token in self.TIMES 3398 else exp.DataType.Type.TIMESTAMPTZ 3399 ) 3400 this = exp.DataType(this=tz_type, expressions=expressions) 3401 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3402 maybe_func = False 3403 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3404 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3405 maybe_func = False 3406 elif type_token == TokenType.INTERVAL: 3407 unit = self._parse_var() 3408 3409 if self._match_text_seq("TO"): 3410 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3411 else: 3412 span = None 3413 3414 if span or not unit: 3415 this = self.expression( 3416 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3417 ) 3418 else: 3419 this = self.expression(exp.Interval, unit=unit) 3420 3421 if maybe_func and check_func: 3422 index2 = self._index 3423 peek = self._parse_string() 3424 3425 if not peek: 3426 self._retreat(index) 3427 return None 3428 3429 self._retreat(index2) 3430 3431 if not this: 3432 if self._match_text_seq("UNSIGNED"): 3433 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3434 if not unsigned_type_token: 3435 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3436 3437 type_token = unsigned_type_token or type_token 3438 3439 this = exp.DataType( 3440 this=exp.DataType.Type[type_token.value], 3441 expressions=expressions, 3442 nested=nested, 3443 values=values, 3444 prefix=prefix, 3445 ) 3446 3447 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3448 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3449 3450 return this 3451 3452 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3453 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3454 self._match(TokenType.COLON) 3455 return self._parse_column_def(this) 3456 3457 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3458 if not self._match_text_seq("AT", "TIME", "ZONE"): 3459 return this 3460 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3461 3462 def _parse_column(self) -> t.Optional[exp.Expression]: 3463 this = self._parse_field() 3464 if isinstance(this, exp.Identifier): 3465 this = self.expression(exp.Column, this=this) 3466 elif not this: 3467 return self._parse_bracket(this) 3468 return self._parse_column_ops(this) 3469 3470 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3471 this = self._parse_bracket(this) 3472 3473 while self._match_set(self.COLUMN_OPERATORS): 3474 op_token = self._prev.token_type 3475 op = self.COLUMN_OPERATORS.get(op_token) 3476 3477 if op_token == TokenType.DCOLON: 3478 field = self._parse_types() 3479 if not field: 3480 self.raise_error("Expected type") 3481 elif op and self._curr: 3482 self._advance() 3483 value = self._prev.text 3484 field = ( 3485 exp.Literal.number(value) 3486 if self._prev.token_type == TokenType.NUMBER 3487 else exp.Literal.string(value) 3488 ) 3489 else: 3490 field = self._parse_field(anonymous_func=True, any_token=True) 3491 3492 if isinstance(field, exp.Func): 3493 # bigquery allows function calls like x.y.count(...) 3494 # SAFE.SUBSTR(...) 3495 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3496 this = self._replace_columns_with_dots(this) 3497 3498 if op: 3499 this = op(self, this, field) 3500 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3501 this = self.expression( 3502 exp.Column, 3503 this=field, 3504 table=this.this, 3505 db=this.args.get("table"), 3506 catalog=this.args.get("db"), 3507 ) 3508 else: 3509 this = self.expression(exp.Dot, this=this, expression=field) 3510 this = self._parse_bracket(this) 3511 return this 3512 3513 def _parse_primary(self) -> t.Optional[exp.Expression]: 3514 if self._match_set(self.PRIMARY_PARSERS): 3515 token_type = self._prev.token_type 3516 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3517 3518 if token_type == TokenType.STRING: 3519 expressions = [primary] 3520 while self._match(TokenType.STRING): 3521 expressions.append(exp.Literal.string(self._prev.text)) 3522 3523 if len(expressions) > 1: 3524 return self.expression(exp.Concat, expressions=expressions) 3525 3526 return primary 3527 3528 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3529 return exp.Literal.number(f"0.{self._prev.text}") 3530 3531 if self._match(TokenType.L_PAREN): 3532 comments = self._prev_comments 3533 query = self._parse_select() 3534 3535 if query: 3536 expressions = [query] 3537 else: 3538 expressions = self._parse_expressions() 3539 3540 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3541 3542 if isinstance(this, exp.Subqueryable): 3543 this = self._parse_set_operations( 3544 self._parse_subquery(this=this, parse_alias=False) 3545 ) 3546 elif len(expressions) > 1: 3547 this = self.expression(exp.Tuple, expressions=expressions) 3548 else: 3549 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3550 3551 if this: 3552 this.add_comments(comments) 3553 3554 self._match_r_paren(expression=this) 3555 return this 3556 3557 return None 3558 3559 def _parse_field( 3560 self, 3561 any_token: bool = False, 3562 tokens: t.Optional[t.Collection[TokenType]] = None, 3563 anonymous_func: bool = False, 3564 ) -> t.Optional[exp.Expression]: 3565 return ( 3566 self._parse_primary() 3567 or self._parse_function(anonymous=anonymous_func) 3568 or self._parse_id_var(any_token=any_token, tokens=tokens) 3569 ) 3570 3571 def _parse_function( 3572 self, 3573 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3574 anonymous: bool = False, 3575 optional_parens: bool = True, 3576 ) -> t.Optional[exp.Expression]: 3577 if not self._curr: 3578 return None 3579 3580 token_type = self._curr.token_type 3581 this = self._curr.text 3582 upper = this.upper() 3583 3584 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3585 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3586 self._advance() 3587 return parser(self) 3588 3589 if not self._next or self._next.token_type != TokenType.L_PAREN: 3590 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3591 self._advance() 3592 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3593 3594 return None 3595 3596 if token_type not in self.FUNC_TOKENS: 3597 return None 3598 3599 self._advance(2) 3600 3601 parser = self.FUNCTION_PARSERS.get(upper) 3602 if parser and not anonymous: 3603 this = parser(self) 3604 else: 3605 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3606 3607 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3608 this = self.expression(subquery_predicate, this=self._parse_select()) 3609 self._match_r_paren() 3610 return this 3611 3612 if functions is None: 3613 functions = self.FUNCTIONS 3614 3615 function = functions.get(upper) 3616 3617 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3618 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3619 3620 if function and not anonymous: 3621 func = self.validate_expression(function(args), args) 3622 if not self.NORMALIZE_FUNCTIONS: 3623 func.meta["name"] = this 3624 this = func 3625 else: 3626 this = self.expression(exp.Anonymous, this=this, expressions=args) 3627 3628 self._match_r_paren(this) 3629 return self._parse_window(this) 3630 3631 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3632 return self._parse_column_def(self._parse_id_var()) 3633 3634 def _parse_user_defined_function( 3635 self, kind: t.Optional[TokenType] = None 3636 ) -> t.Optional[exp.Expression]: 3637 this = self._parse_id_var() 3638 3639 while self._match(TokenType.DOT): 3640 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3641 3642 if not self._match(TokenType.L_PAREN): 3643 return this 3644 3645 expressions = self._parse_csv(self._parse_function_parameter) 3646 self._match_r_paren() 3647 return self.expression( 3648 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3649 ) 3650 3651 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3652 literal = self._parse_primary() 3653 if literal: 3654 return self.expression(exp.Introducer, this=token.text, expression=literal) 3655 3656 return self.expression(exp.Identifier, this=token.text) 3657 3658 def _parse_session_parameter(self) -> exp.SessionParameter: 3659 kind = None 3660 this = self._parse_id_var() or self._parse_primary() 3661 3662 if this and self._match(TokenType.DOT): 3663 kind = this.name 3664 this = self._parse_var() or self._parse_primary() 3665 3666 return self.expression(exp.SessionParameter, this=this, kind=kind) 3667 3668 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3669 index = self._index 3670 3671 if self._match(TokenType.L_PAREN): 3672 expressions = t.cast( 3673 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3674 ) 3675 3676 if not self._match(TokenType.R_PAREN): 3677 self._retreat(index) 3678 else: 3679 expressions = [self._parse_id_var()] 3680 3681 if self._match_set(self.LAMBDAS): 3682 return self.LAMBDAS[self._prev.token_type](self, expressions) 3683 3684 self._retreat(index) 3685 3686 this: t.Optional[exp.Expression] 3687 3688 if self._match(TokenType.DISTINCT): 3689 this = self.expression( 3690 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3691 ) 3692 else: 3693 this = self._parse_select_or_expression(alias=alias) 3694 3695 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3696 3697 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3698 index = self._index 3699 3700 if not self.errors: 3701 try: 3702 if self._parse_select(nested=True): 3703 return this 3704 except ParseError: 3705 pass 3706 finally: 3707 self.errors.clear() 3708 self._retreat(index) 3709 3710 if not self._match(TokenType.L_PAREN): 3711 return this 3712 3713 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3714 3715 self._match_r_paren() 3716 return self.expression(exp.Schema, this=this, expressions=args) 3717 3718 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3719 return self._parse_column_def(self._parse_field(any_token=True)) 3720 3721 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3722 # column defs are not really columns, they're identifiers 3723 if isinstance(this, exp.Column): 3724 this = this.this 3725 3726 kind = self._parse_types(schema=True) 3727 3728 if self._match_text_seq("FOR", "ORDINALITY"): 3729 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3730 3731 constraints: t.List[exp.Expression] = [] 3732 3733 if not kind and self._match(TokenType.ALIAS): 3734 constraints.append( 3735 self.expression( 3736 exp.ComputedColumnConstraint, 3737 this=self._parse_conjunction(), 3738 persisted=self._match_text_seq("PERSISTED"), 3739 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3740 ) 3741 ) 3742 3743 while True: 3744 constraint = self._parse_column_constraint() 3745 if not constraint: 3746 break 3747 constraints.append(constraint) 3748 3749 if not kind and not constraints: 3750 return this 3751 3752 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3753 3754 def _parse_auto_increment( 3755 self, 3756 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3757 start = None 3758 increment = None 3759 3760 if self._match(TokenType.L_PAREN, advance=False): 3761 args = self._parse_wrapped_csv(self._parse_bitwise) 3762 start = seq_get(args, 0) 3763 increment = seq_get(args, 1) 3764 elif self._match_text_seq("START"): 3765 start = self._parse_bitwise() 3766 self._match_text_seq("INCREMENT") 3767 increment = self._parse_bitwise() 3768 3769 if start and increment: 3770 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3771 3772 return exp.AutoIncrementColumnConstraint() 3773 3774 def _parse_compress(self) -> exp.CompressColumnConstraint: 3775 if self._match(TokenType.L_PAREN, advance=False): 3776 return self.expression( 3777 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3778 ) 3779 3780 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3781 3782 def _parse_generated_as_identity( 3783 self, 3784 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3785 if self._match_text_seq("BY", "DEFAULT"): 3786 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3787 this = self.expression( 3788 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3789 ) 3790 else: 3791 self._match_text_seq("ALWAYS") 3792 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3793 3794 self._match(TokenType.ALIAS) 3795 identity = self._match_text_seq("IDENTITY") 3796 3797 if self._match(TokenType.L_PAREN): 3798 if self._match(TokenType.START_WITH): 3799 this.set("start", self._parse_bitwise()) 3800 if self._match_text_seq("INCREMENT", "BY"): 3801 this.set("increment", self._parse_bitwise()) 3802 if self._match_text_seq("MINVALUE"): 3803 this.set("minvalue", self._parse_bitwise()) 3804 if self._match_text_seq("MAXVALUE"): 3805 this.set("maxvalue", self._parse_bitwise()) 3806 3807 if self._match_text_seq("CYCLE"): 3808 this.set("cycle", True) 3809 elif self._match_text_seq("NO", "CYCLE"): 3810 this.set("cycle", False) 3811 3812 if not identity: 3813 this.set("expression", self._parse_bitwise()) 3814 3815 self._match_r_paren() 3816 3817 return this 3818 3819 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3820 self._match_text_seq("LENGTH") 3821 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3822 3823 def _parse_not_constraint( 3824 self, 3825 ) -> t.Optional[exp.Expression]: 3826 if self._match_text_seq("NULL"): 3827 return self.expression(exp.NotNullColumnConstraint) 3828 if self._match_text_seq("CASESPECIFIC"): 3829 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3830 if self._match_text_seq("FOR", "REPLICATION"): 3831 return self.expression(exp.NotForReplicationColumnConstraint) 3832 return None 3833 3834 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3835 if self._match(TokenType.CONSTRAINT): 3836 this = self._parse_id_var() 3837 else: 3838 this = None 3839 3840 if self._match_texts(self.CONSTRAINT_PARSERS): 3841 return self.expression( 3842 exp.ColumnConstraint, 3843 this=this, 3844 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3845 ) 3846 3847 return this 3848 3849 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3850 if not self._match(TokenType.CONSTRAINT): 3851 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3852 3853 this = self._parse_id_var() 3854 expressions = [] 3855 3856 while True: 3857 constraint = self._parse_unnamed_constraint() or self._parse_function() 3858 if not constraint: 3859 break 3860 expressions.append(constraint) 3861 3862 return self.expression(exp.Constraint, this=this, expressions=expressions) 3863 3864 def _parse_unnamed_constraint( 3865 self, constraints: t.Optional[t.Collection[str]] = None 3866 ) -> t.Optional[exp.Expression]: 3867 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3868 return None 3869 3870 constraint = self._prev.text.upper() 3871 if constraint not in self.CONSTRAINT_PARSERS: 3872 self.raise_error(f"No parser found for schema constraint {constraint}.") 3873 3874 return self.CONSTRAINT_PARSERS[constraint](self) 3875 3876 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3877 self._match_text_seq("KEY") 3878 return self.expression( 3879 exp.UniqueColumnConstraint, 3880 this=self._parse_schema(self._parse_id_var(any_token=False)), 3881 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3882 ) 3883 3884 def _parse_key_constraint_options(self) -> t.List[str]: 3885 options = [] 3886 while True: 3887 if not self._curr: 3888 break 3889 3890 if self._match(TokenType.ON): 3891 action = None 3892 on = self._advance_any() and self._prev.text 3893 3894 if self._match_text_seq("NO", "ACTION"): 3895 action = "NO ACTION" 3896 elif self._match_text_seq("CASCADE"): 3897 action = "CASCADE" 3898 elif self._match_text_seq("RESTRICT"): 3899 action = "RESTRICT" 3900 elif self._match_pair(TokenType.SET, TokenType.NULL): 3901 action = "SET NULL" 3902 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3903 action = "SET DEFAULT" 3904 else: 3905 self.raise_error("Invalid key constraint") 3906 3907 options.append(f"ON {on} {action}") 3908 elif self._match_text_seq("NOT", "ENFORCED"): 3909 options.append("NOT ENFORCED") 3910 elif self._match_text_seq("DEFERRABLE"): 3911 options.append("DEFERRABLE") 3912 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3913 options.append("INITIALLY DEFERRED") 3914 elif self._match_text_seq("NORELY"): 3915 options.append("NORELY") 3916 elif self._match_text_seq("MATCH", "FULL"): 3917 options.append("MATCH FULL") 3918 else: 3919 break 3920 3921 return options 3922 3923 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3924 if match and not self._match(TokenType.REFERENCES): 3925 return None 3926 3927 expressions = None 3928 this = self._parse_table(schema=True) 3929 options = self._parse_key_constraint_options() 3930 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3931 3932 def _parse_foreign_key(self) -> exp.ForeignKey: 3933 expressions = self._parse_wrapped_id_vars() 3934 reference = self._parse_references() 3935 options = {} 3936 3937 while self._match(TokenType.ON): 3938 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3939 self.raise_error("Expected DELETE or UPDATE") 3940 3941 kind = self._prev.text.lower() 3942 3943 if self._match_text_seq("NO", "ACTION"): 3944 action = "NO ACTION" 3945 elif self._match(TokenType.SET): 3946 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3947 action = "SET " + self._prev.text.upper() 3948 else: 3949 self._advance() 3950 action = self._prev.text.upper() 3951 3952 options[kind] = action 3953 3954 return self.expression( 3955 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3956 ) 3957 3958 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3959 return self._parse_field() 3960 3961 def _parse_primary_key( 3962 self, wrapped_optional: bool = False, in_props: bool = False 3963 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3964 desc = ( 3965 self._match_set((TokenType.ASC, TokenType.DESC)) 3966 and self._prev.token_type == TokenType.DESC 3967 ) 3968 3969 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3970 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3971 3972 expressions = self._parse_wrapped_csv( 3973 self._parse_primary_key_part, optional=wrapped_optional 3974 ) 3975 options = self._parse_key_constraint_options() 3976 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3977 3978 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3979 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3980 return this 3981 3982 bracket_kind = self._prev.token_type 3983 3984 if self._match(TokenType.COLON): 3985 expressions: t.List[exp.Expression] = [ 3986 self.expression(exp.Slice, expression=self._parse_conjunction()) 3987 ] 3988 else: 3989 expressions = self._parse_csv( 3990 lambda: self._parse_slice( 3991 self._parse_alias(self._parse_conjunction(), explicit=True) 3992 ) 3993 ) 3994 3995 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3996 if bracket_kind == TokenType.L_BRACE: 3997 this = self.expression(exp.Struct, expressions=expressions) 3998 elif not this or this.name.upper() == "ARRAY": 3999 this = self.expression(exp.Array, expressions=expressions) 4000 else: 4001 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4002 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4003 4004 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4005 self.raise_error("Expected ]") 4006 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4007 self.raise_error("Expected }") 4008 4009 self._add_comments(this) 4010 return self._parse_bracket(this) 4011 4012 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4013 if self._match(TokenType.COLON): 4014 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4015 return this 4016 4017 def _parse_case(self) -> t.Optional[exp.Expression]: 4018 ifs = [] 4019 default = None 4020 4021 comments = self._prev_comments 4022 expression = self._parse_conjunction() 4023 4024 while self._match(TokenType.WHEN): 4025 this = self._parse_conjunction() 4026 self._match(TokenType.THEN) 4027 then = self._parse_conjunction() 4028 ifs.append(self.expression(exp.If, this=this, true=then)) 4029 4030 if self._match(TokenType.ELSE): 4031 default = self._parse_conjunction() 4032 4033 if not self._match(TokenType.END): 4034 self.raise_error("Expected END after CASE", self._prev) 4035 4036 return self._parse_window( 4037 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4038 ) 4039 4040 def _parse_if(self) -> t.Optional[exp.Expression]: 4041 if self._match(TokenType.L_PAREN): 4042 args = self._parse_csv(self._parse_conjunction) 4043 this = self.validate_expression(exp.If.from_arg_list(args), args) 4044 self._match_r_paren() 4045 else: 4046 index = self._index - 1 4047 condition = self._parse_conjunction() 4048 4049 if not condition: 4050 self._retreat(index) 4051 return None 4052 4053 self._match(TokenType.THEN) 4054 true = self._parse_conjunction() 4055 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4056 self._match(TokenType.END) 4057 this = self.expression(exp.If, this=condition, true=true, false=false) 4058 4059 return self._parse_window(this) 4060 4061 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4062 if not self._match_text_seq("VALUE", "FOR"): 4063 self._retreat(self._index - 1) 4064 return None 4065 4066 return self.expression( 4067 exp.NextValueFor, 4068 this=self._parse_column(), 4069 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4070 ) 4071 4072 def _parse_extract(self) -> exp.Extract: 4073 this = self._parse_function() or self._parse_var() or self._parse_type() 4074 4075 if self._match(TokenType.FROM): 4076 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4077 4078 if not self._match(TokenType.COMMA): 4079 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4080 4081 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4082 4083 def _parse_any_value(self) -> exp.AnyValue: 4084 this = self._parse_lambda() 4085 is_max = None 4086 having = None 4087 4088 if self._match(TokenType.HAVING): 4089 self._match_texts(("MAX", "MIN")) 4090 is_max = self._prev.text == "MAX" 4091 having = self._parse_column() 4092 4093 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4094 4095 def _parse_cast(self, strict: bool) -> exp.Expression: 4096 this = self._parse_conjunction() 4097 4098 if not self._match(TokenType.ALIAS): 4099 if self._match(TokenType.COMMA): 4100 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4101 4102 self.raise_error("Expected AS after CAST") 4103 4104 fmt = None 4105 to = self._parse_types() 4106 4107 if not to: 4108 self.raise_error("Expected TYPE after CAST") 4109 elif isinstance(to, exp.Identifier): 4110 to = exp.DataType.build(to.name, udt=True) 4111 elif to.this == exp.DataType.Type.CHAR: 4112 if self._match(TokenType.CHARACTER_SET): 4113 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4114 elif self._match(TokenType.FORMAT): 4115 fmt_string = self._parse_string() 4116 fmt = self._parse_at_time_zone(fmt_string) 4117 4118 if to.this in exp.DataType.TEMPORAL_TYPES: 4119 this = self.expression( 4120 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4121 this=this, 4122 format=exp.Literal.string( 4123 format_time( 4124 fmt_string.this if fmt_string else "", 4125 self.FORMAT_MAPPING or self.TIME_MAPPING, 4126 self.FORMAT_TRIE or self.TIME_TRIE, 4127 ) 4128 ), 4129 ) 4130 4131 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4132 this.set("zone", fmt.args["zone"]) 4133 4134 return this 4135 4136 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4137 4138 def _parse_concat(self) -> t.Optional[exp.Expression]: 4139 args = self._parse_csv(self._parse_conjunction) 4140 if self.CONCAT_NULL_OUTPUTS_STRING: 4141 args = self._ensure_string_if_null(args) 4142 4143 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4144 # we find such a call we replace it with its argument. 4145 if len(args) == 1: 4146 return args[0] 4147 4148 return self.expression( 4149 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4150 ) 4151 4152 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4153 args = self._parse_csv(self._parse_conjunction) 4154 if len(args) < 2: 4155 return self.expression(exp.ConcatWs, expressions=args) 4156 delim, *values = args 4157 if self.CONCAT_NULL_OUTPUTS_STRING: 4158 values = self._ensure_string_if_null(values) 4159 4160 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4161 4162 def _parse_string_agg(self) -> exp.Expression: 4163 if self._match(TokenType.DISTINCT): 4164 args: t.List[t.Optional[exp.Expression]] = [ 4165 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4166 ] 4167 if self._match(TokenType.COMMA): 4168 args.extend(self._parse_csv(self._parse_conjunction)) 4169 else: 4170 args = self._parse_csv(self._parse_conjunction) # type: ignore 4171 4172 index = self._index 4173 if not self._match(TokenType.R_PAREN) and args: 4174 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4175 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4176 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4177 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4178 4179 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4180 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4181 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4182 if not self._match_text_seq("WITHIN", "GROUP"): 4183 self._retreat(index) 4184 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4185 4186 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4187 order = self._parse_order(this=seq_get(args, 0)) 4188 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4189 4190 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4191 this = self._parse_bitwise() 4192 4193 if self._match(TokenType.USING): 4194 to: t.Optional[exp.Expression] = self.expression( 4195 exp.CharacterSet, this=self._parse_var() 4196 ) 4197 elif self._match(TokenType.COMMA): 4198 to = self._parse_types() 4199 else: 4200 to = None 4201 4202 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4203 4204 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4205 """ 4206 There are generally two variants of the DECODE function: 4207 4208 - DECODE(bin, charset) 4209 - DECODE(expression, search, result [, search, result] ... [, default]) 4210 4211 The second variant will always be parsed into a CASE expression. Note that NULL 4212 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4213 instead of relying on pattern matching. 4214 """ 4215 args = self._parse_csv(self._parse_conjunction) 4216 4217 if len(args) < 3: 4218 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4219 4220 expression, *expressions = args 4221 if not expression: 4222 return None 4223 4224 ifs = [] 4225 for search, result in zip(expressions[::2], expressions[1::2]): 4226 if not search or not result: 4227 return None 4228 4229 if isinstance(search, exp.Literal): 4230 ifs.append( 4231 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4232 ) 4233 elif isinstance(search, exp.Null): 4234 ifs.append( 4235 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4236 ) 4237 else: 4238 cond = exp.or_( 4239 exp.EQ(this=expression.copy(), expression=search), 4240 exp.and_( 4241 exp.Is(this=expression.copy(), expression=exp.Null()), 4242 exp.Is(this=search.copy(), expression=exp.Null()), 4243 copy=False, 4244 ), 4245 copy=False, 4246 ) 4247 ifs.append(exp.If(this=cond, true=result)) 4248 4249 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4250 4251 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4252 self._match_text_seq("KEY") 4253 key = self._parse_column() 4254 self._match_set((TokenType.COLON, TokenType.COMMA)) 4255 self._match_text_seq("VALUE") 4256 value = self._parse_bitwise() 4257 4258 if not key and not value: 4259 return None 4260 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4261 4262 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4263 if not this or not self._match_text_seq("FORMAT", "JSON"): 4264 return this 4265 4266 return self.expression(exp.FormatJson, this=this) 4267 4268 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4269 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4270 for value in values: 4271 if self._match_text_seq(value, "ON", on): 4272 return f"{value} ON {on}" 4273 4274 return None 4275 4276 def _parse_json_object(self) -> exp.JSONObject: 4277 star = self._parse_star() 4278 expressions = ( 4279 [star] 4280 if star 4281 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4282 ) 4283 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4284 4285 unique_keys = None 4286 if self._match_text_seq("WITH", "UNIQUE"): 4287 unique_keys = True 4288 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4289 unique_keys = False 4290 4291 self._match_text_seq("KEYS") 4292 4293 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4294 self._parse_type() 4295 ) 4296 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4297 4298 return self.expression( 4299 exp.JSONObject, 4300 expressions=expressions, 4301 null_handling=null_handling, 4302 unique_keys=unique_keys, 4303 return_type=return_type, 4304 encoding=encoding, 4305 ) 4306 4307 def _parse_logarithm(self) -> exp.Func: 4308 # Default argument order is base, expression 4309 args = self._parse_csv(self._parse_range) 4310 4311 if len(args) > 1: 4312 if not self.LOG_BASE_FIRST: 4313 args.reverse() 4314 return exp.Log.from_arg_list(args) 4315 4316 return self.expression( 4317 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4318 ) 4319 4320 def _parse_match_against(self) -> exp.MatchAgainst: 4321 expressions = self._parse_csv(self._parse_column) 4322 4323 self._match_text_seq(")", "AGAINST", "(") 4324 4325 this = self._parse_string() 4326 4327 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4328 modifier = "IN NATURAL LANGUAGE MODE" 4329 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4330 modifier = f"{modifier} WITH QUERY EXPANSION" 4331 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4332 modifier = "IN BOOLEAN MODE" 4333 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4334 modifier = "WITH QUERY EXPANSION" 4335 else: 4336 modifier = None 4337 4338 return self.expression( 4339 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4340 ) 4341 4342 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4343 def _parse_open_json(self) -> exp.OpenJSON: 4344 this = self._parse_bitwise() 4345 path = self._match(TokenType.COMMA) and self._parse_string() 4346 4347 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4348 this = self._parse_field(any_token=True) 4349 kind = self._parse_types() 4350 path = self._parse_string() 4351 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4352 4353 return self.expression( 4354 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4355 ) 4356 4357 expressions = None 4358 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4359 self._match_l_paren() 4360 expressions = self._parse_csv(_parse_open_json_column_def) 4361 4362 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4363 4364 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4365 args = self._parse_csv(self._parse_bitwise) 4366 4367 if self._match(TokenType.IN): 4368 return self.expression( 4369 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4370 ) 4371 4372 if haystack_first: 4373 haystack = seq_get(args, 0) 4374 needle = seq_get(args, 1) 4375 else: 4376 needle = seq_get(args, 0) 4377 haystack = seq_get(args, 1) 4378 4379 return self.expression( 4380 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4381 ) 4382 4383 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4384 args = self._parse_csv(self._parse_table) 4385 return exp.JoinHint(this=func_name.upper(), expressions=args) 4386 4387 def _parse_substring(self) -> exp.Substring: 4388 # Postgres supports the form: substring(string [from int] [for int]) 4389 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4390 4391 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4392 4393 if self._match(TokenType.FROM): 4394 args.append(self._parse_bitwise()) 4395 if self._match(TokenType.FOR): 4396 args.append(self._parse_bitwise()) 4397 4398 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4399 4400 def _parse_trim(self) -> exp.Trim: 4401 # https://www.w3resource.com/sql/character-functions/trim.php 4402 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4403 4404 position = None 4405 collation = None 4406 4407 if self._match_texts(self.TRIM_TYPES): 4408 position = self._prev.text.upper() 4409 4410 expression = self._parse_bitwise() 4411 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4412 this = self._parse_bitwise() 4413 else: 4414 this = expression 4415 expression = None 4416 4417 if self._match(TokenType.COLLATE): 4418 collation = self._parse_bitwise() 4419 4420 return self.expression( 4421 exp.Trim, this=this, position=position, expression=expression, collation=collation 4422 ) 4423 4424 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4425 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4426 4427 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4428 return self._parse_window(self._parse_id_var(), alias=True) 4429 4430 def _parse_respect_or_ignore_nulls( 4431 self, this: t.Optional[exp.Expression] 4432 ) -> t.Optional[exp.Expression]: 4433 if self._match_text_seq("IGNORE", "NULLS"): 4434 return self.expression(exp.IgnoreNulls, this=this) 4435 if self._match_text_seq("RESPECT", "NULLS"): 4436 return self.expression(exp.RespectNulls, this=this) 4437 return this 4438 4439 def _parse_window( 4440 self, this: t.Optional[exp.Expression], alias: bool = False 4441 ) -> t.Optional[exp.Expression]: 4442 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4443 self._match(TokenType.WHERE) 4444 this = self.expression( 4445 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4446 ) 4447 self._match_r_paren() 4448 4449 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4450 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4451 if self._match_text_seq("WITHIN", "GROUP"): 4452 order = self._parse_wrapped(self._parse_order) 4453 this = self.expression(exp.WithinGroup, this=this, expression=order) 4454 4455 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4456 # Some dialects choose to implement and some do not. 4457 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4458 4459 # There is some code above in _parse_lambda that handles 4460 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4461 4462 # The below changes handle 4463 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4464 4465 # Oracle allows both formats 4466 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4467 # and Snowflake chose to do the same for familiarity 4468 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4469 this = self._parse_respect_or_ignore_nulls(this) 4470 4471 # bigquery select from window x AS (partition by ...) 4472 if alias: 4473 over = None 4474 self._match(TokenType.ALIAS) 4475 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4476 return this 4477 else: 4478 over = self._prev.text.upper() 4479 4480 if not self._match(TokenType.L_PAREN): 4481 return self.expression( 4482 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4483 ) 4484 4485 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4486 4487 first = self._match(TokenType.FIRST) 4488 if self._match_text_seq("LAST"): 4489 first = False 4490 4491 partition, order = self._parse_partition_and_order() 4492 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4493 4494 if kind: 4495 self._match(TokenType.BETWEEN) 4496 start = self._parse_window_spec() 4497 self._match(TokenType.AND) 4498 end = self._parse_window_spec() 4499 4500 spec = self.expression( 4501 exp.WindowSpec, 4502 kind=kind, 4503 start=start["value"], 4504 start_side=start["side"], 4505 end=end["value"], 4506 end_side=end["side"], 4507 ) 4508 else: 4509 spec = None 4510 4511 self._match_r_paren() 4512 4513 window = self.expression( 4514 exp.Window, 4515 this=this, 4516 partition_by=partition, 4517 order=order, 4518 spec=spec, 4519 alias=window_alias, 4520 over=over, 4521 first=first, 4522 ) 4523 4524 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4525 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4526 return self._parse_window(window, alias=alias) 4527 4528 return window 4529 4530 def _parse_partition_and_order( 4531 self, 4532 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4533 return self._parse_partition_by(), self._parse_order() 4534 4535 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4536 self._match(TokenType.BETWEEN) 4537 4538 return { 4539 "value": ( 4540 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4541 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4542 or self._parse_bitwise() 4543 ), 4544 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4545 } 4546 4547 def _parse_alias( 4548 self, this: t.Optional[exp.Expression], explicit: bool = False 4549 ) -> t.Optional[exp.Expression]: 4550 any_token = self._match(TokenType.ALIAS) 4551 4552 if explicit and not any_token: 4553 return this 4554 4555 if self._match(TokenType.L_PAREN): 4556 aliases = self.expression( 4557 exp.Aliases, 4558 this=this, 4559 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4560 ) 4561 self._match_r_paren(aliases) 4562 return aliases 4563 4564 alias = self._parse_id_var(any_token) 4565 4566 if alias: 4567 return self.expression(exp.Alias, this=this, alias=alias) 4568 4569 return this 4570 4571 def _parse_id_var( 4572 self, 4573 any_token: bool = True, 4574 tokens: t.Optional[t.Collection[TokenType]] = None, 4575 ) -> t.Optional[exp.Expression]: 4576 identifier = self._parse_identifier() 4577 4578 if identifier: 4579 return identifier 4580 4581 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4582 quoted = self._prev.token_type == TokenType.STRING 4583 return exp.Identifier(this=self._prev.text, quoted=quoted) 4584 4585 return None 4586 4587 def _parse_string(self) -> t.Optional[exp.Expression]: 4588 if self._match(TokenType.STRING): 4589 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4590 return self._parse_placeholder() 4591 4592 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4593 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4594 4595 def _parse_number(self) -> t.Optional[exp.Expression]: 4596 if self._match(TokenType.NUMBER): 4597 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4598 return self._parse_placeholder() 4599 4600 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4601 if self._match(TokenType.IDENTIFIER): 4602 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4603 return self._parse_placeholder() 4604 4605 def _parse_var( 4606 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4607 ) -> t.Optional[exp.Expression]: 4608 if ( 4609 (any_token and self._advance_any()) 4610 or self._match(TokenType.VAR) 4611 or (self._match_set(tokens) if tokens else False) 4612 ): 4613 return self.expression(exp.Var, this=self._prev.text) 4614 return self._parse_placeholder() 4615 4616 def _advance_any(self) -> t.Optional[Token]: 4617 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4618 self._advance() 4619 return self._prev 4620 return None 4621 4622 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4623 return self._parse_var() or self._parse_string() 4624 4625 def _parse_null(self) -> t.Optional[exp.Expression]: 4626 if self._match_set(self.NULL_TOKENS): 4627 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4628 return self._parse_placeholder() 4629 4630 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4631 if self._match(TokenType.TRUE): 4632 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4633 if self._match(TokenType.FALSE): 4634 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4635 return self._parse_placeholder() 4636 4637 def _parse_star(self) -> t.Optional[exp.Expression]: 4638 if self._match(TokenType.STAR): 4639 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4640 return self._parse_placeholder() 4641 4642 def _parse_parameter(self) -> exp.Parameter: 4643 wrapped = self._match(TokenType.L_BRACE) 4644 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4645 self._match(TokenType.R_BRACE) 4646 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4647 4648 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4649 if self._match_set(self.PLACEHOLDER_PARSERS): 4650 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4651 if placeholder: 4652 return placeholder 4653 self._advance(-1) 4654 return None 4655 4656 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4657 if not self._match(TokenType.EXCEPT): 4658 return None 4659 if self._match(TokenType.L_PAREN, advance=False): 4660 return self._parse_wrapped_csv(self._parse_column) 4661 4662 except_column = self._parse_column() 4663 return [except_column] if except_column else None 4664 4665 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4666 if not self._match(TokenType.REPLACE): 4667 return None 4668 if self._match(TokenType.L_PAREN, advance=False): 4669 return self._parse_wrapped_csv(self._parse_expression) 4670 4671 replace_expression = self._parse_expression() 4672 return [replace_expression] if replace_expression else None 4673 4674 def _parse_csv( 4675 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4676 ) -> t.List[exp.Expression]: 4677 parse_result = parse_method() 4678 items = [parse_result] if parse_result is not None else [] 4679 4680 while self._match(sep): 4681 self._add_comments(parse_result) 4682 parse_result = parse_method() 4683 if parse_result is not None: 4684 items.append(parse_result) 4685 4686 return items 4687 4688 def _parse_tokens( 4689 self, parse_method: t.Callable, expressions: t.Dict 4690 ) -> t.Optional[exp.Expression]: 4691 this = parse_method() 4692 4693 while self._match_set(expressions): 4694 this = self.expression( 4695 expressions[self._prev.token_type], 4696 this=this, 4697 comments=self._prev_comments, 4698 expression=parse_method(), 4699 ) 4700 4701 return this 4702 4703 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4704 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4705 4706 def _parse_wrapped_csv( 4707 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4708 ) -> t.List[exp.Expression]: 4709 return self._parse_wrapped( 4710 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4711 ) 4712 4713 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4714 wrapped = self._match(TokenType.L_PAREN) 4715 if not wrapped and not optional: 4716 self.raise_error("Expecting (") 4717 parse_result = parse_method() 4718 if wrapped: 4719 self._match_r_paren() 4720 return parse_result 4721 4722 def _parse_expressions(self) -> t.List[exp.Expression]: 4723 return self._parse_csv(self._parse_expression) 4724 4725 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4726 return self._parse_select() or self._parse_set_operations( 4727 self._parse_expression() if alias else self._parse_conjunction() 4728 ) 4729 4730 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4731 return self._parse_query_modifiers( 4732 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4733 ) 4734 4735 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4736 this = None 4737 if self._match_texts(self.TRANSACTION_KIND): 4738 this = self._prev.text 4739 4740 self._match_texts({"TRANSACTION", "WORK"}) 4741 4742 modes = [] 4743 while True: 4744 mode = [] 4745 while self._match(TokenType.VAR): 4746 mode.append(self._prev.text) 4747 4748 if mode: 4749 modes.append(" ".join(mode)) 4750 if not self._match(TokenType.COMMA): 4751 break 4752 4753 return self.expression(exp.Transaction, this=this, modes=modes) 4754 4755 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4756 chain = None 4757 savepoint = None 4758 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4759 4760 self._match_texts({"TRANSACTION", "WORK"}) 4761 4762 if self._match_text_seq("TO"): 4763 self._match_text_seq("SAVEPOINT") 4764 savepoint = self._parse_id_var() 4765 4766 if self._match(TokenType.AND): 4767 chain = not self._match_text_seq("NO") 4768 self._match_text_seq("CHAIN") 4769 4770 if is_rollback: 4771 return self.expression(exp.Rollback, savepoint=savepoint) 4772 4773 return self.expression(exp.Commit, chain=chain) 4774 4775 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4776 if not self._match_text_seq("ADD"): 4777 return None 4778 4779 self._match(TokenType.COLUMN) 4780 exists_column = self._parse_exists(not_=True) 4781 expression = self._parse_field_def() 4782 4783 if expression: 4784 expression.set("exists", exists_column) 4785 4786 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4787 if self._match_texts(("FIRST", "AFTER")): 4788 position = self._prev.text 4789 column_position = self.expression( 4790 exp.ColumnPosition, this=self._parse_column(), position=position 4791 ) 4792 expression.set("position", column_position) 4793 4794 return expression 4795 4796 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4797 drop = self._match(TokenType.DROP) and self._parse_drop() 4798 if drop and not isinstance(drop, exp.Command): 4799 drop.set("kind", drop.args.get("kind", "COLUMN")) 4800 return drop 4801 4802 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4803 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4804 return self.expression( 4805 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4806 ) 4807 4808 def _parse_add_constraint(self) -> exp.AddConstraint: 4809 this = None 4810 kind = self._prev.token_type 4811 4812 if kind == TokenType.CONSTRAINT: 4813 this = self._parse_id_var() 4814 4815 if self._match_text_seq("CHECK"): 4816 expression = self._parse_wrapped(self._parse_conjunction) 4817 enforced = self._match_text_seq("ENFORCED") 4818 4819 return self.expression( 4820 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4821 ) 4822 4823 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4824 expression = self._parse_foreign_key() 4825 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4826 expression = self._parse_primary_key() 4827 else: 4828 expression = None 4829 4830 return self.expression(exp.AddConstraint, this=this, expression=expression) 4831 4832 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4833 index = self._index - 1 4834 4835 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4836 return self._parse_csv(self._parse_add_constraint) 4837 4838 self._retreat(index) 4839 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4840 return self._parse_csv(self._parse_field_def) 4841 4842 return self._parse_csv(self._parse_add_column) 4843 4844 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4845 self._match(TokenType.COLUMN) 4846 column = self._parse_field(any_token=True) 4847 4848 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4849 return self.expression(exp.AlterColumn, this=column, drop=True) 4850 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4851 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4852 4853 self._match_text_seq("SET", "DATA") 4854 return self.expression( 4855 exp.AlterColumn, 4856 this=column, 4857 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4858 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4859 using=self._match(TokenType.USING) and self._parse_conjunction(), 4860 ) 4861 4862 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4863 index = self._index - 1 4864 4865 partition_exists = self._parse_exists() 4866 if self._match(TokenType.PARTITION, advance=False): 4867 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4868 4869 self._retreat(index) 4870 return self._parse_csv(self._parse_drop_column) 4871 4872 def _parse_alter_table_rename(self) -> exp.RenameTable: 4873 self._match_text_seq("TO") 4874 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4875 4876 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4877 start = self._prev 4878 4879 if not self._match(TokenType.TABLE): 4880 return self._parse_as_command(start) 4881 4882 exists = self._parse_exists() 4883 only = self._match_text_seq("ONLY") 4884 this = self._parse_table(schema=True) 4885 4886 if self._next: 4887 self._advance() 4888 4889 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4890 if parser: 4891 actions = ensure_list(parser(self)) 4892 4893 if not self._curr: 4894 return self.expression( 4895 exp.AlterTable, 4896 this=this, 4897 exists=exists, 4898 actions=actions, 4899 only=only, 4900 ) 4901 4902 return self._parse_as_command(start) 4903 4904 def _parse_merge(self) -> exp.Merge: 4905 self._match(TokenType.INTO) 4906 target = self._parse_table() 4907 4908 if target and self._match(TokenType.ALIAS, advance=False): 4909 target.set("alias", self._parse_table_alias()) 4910 4911 self._match(TokenType.USING) 4912 using = self._parse_table() 4913 4914 self._match(TokenType.ON) 4915 on = self._parse_conjunction() 4916 4917 whens = [] 4918 while self._match(TokenType.WHEN): 4919 matched = not self._match(TokenType.NOT) 4920 self._match_text_seq("MATCHED") 4921 source = ( 4922 False 4923 if self._match_text_seq("BY", "TARGET") 4924 else self._match_text_seq("BY", "SOURCE") 4925 ) 4926 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4927 4928 self._match(TokenType.THEN) 4929 4930 if self._match(TokenType.INSERT): 4931 _this = self._parse_star() 4932 if _this: 4933 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4934 else: 4935 then = self.expression( 4936 exp.Insert, 4937 this=self._parse_value(), 4938 expression=self._match(TokenType.VALUES) and self._parse_value(), 4939 ) 4940 elif self._match(TokenType.UPDATE): 4941 expressions = self._parse_star() 4942 if expressions: 4943 then = self.expression(exp.Update, expressions=expressions) 4944 else: 4945 then = self.expression( 4946 exp.Update, 4947 expressions=self._match(TokenType.SET) 4948 and self._parse_csv(self._parse_equality), 4949 ) 4950 elif self._match(TokenType.DELETE): 4951 then = self.expression(exp.Var, this=self._prev.text) 4952 else: 4953 then = None 4954 4955 whens.append( 4956 self.expression( 4957 exp.When, 4958 matched=matched, 4959 source=source, 4960 condition=condition, 4961 then=then, 4962 ) 4963 ) 4964 4965 return self.expression( 4966 exp.Merge, 4967 this=target, 4968 using=using, 4969 on=on, 4970 expressions=whens, 4971 ) 4972 4973 def _parse_show(self) -> t.Optional[exp.Expression]: 4974 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4975 if parser: 4976 return parser(self) 4977 return self._parse_as_command(self._prev) 4978 4979 def _parse_set_item_assignment( 4980 self, kind: t.Optional[str] = None 4981 ) -> t.Optional[exp.Expression]: 4982 index = self._index 4983 4984 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4985 return self._parse_set_transaction(global_=kind == "GLOBAL") 4986 4987 left = self._parse_primary() or self._parse_id_var() 4988 assignment_delimiter = self._match_texts(("=", "TO")) 4989 4990 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 4991 self._retreat(index) 4992 return None 4993 4994 right = self._parse_statement() or self._parse_id_var() 4995 this = self.expression(exp.EQ, this=left, expression=right) 4996 4997 return self.expression(exp.SetItem, this=this, kind=kind) 4998 4999 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5000 self._match_text_seq("TRANSACTION") 5001 characteristics = self._parse_csv( 5002 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5003 ) 5004 return self.expression( 5005 exp.SetItem, 5006 expressions=characteristics, 5007 kind="TRANSACTION", 5008 **{"global": global_}, # type: ignore 5009 ) 5010 5011 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5012 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5013 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5014 5015 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5016 index = self._index 5017 set_ = self.expression( 5018 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5019 ) 5020 5021 if self._curr: 5022 self._retreat(index) 5023 return self._parse_as_command(self._prev) 5024 5025 return set_ 5026 5027 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5028 for option in options: 5029 if self._match_text_seq(*option.split(" ")): 5030 return exp.var(option) 5031 return None 5032 5033 def _parse_as_command(self, start: Token) -> exp.Command: 5034 while self._curr: 5035 self._advance() 5036 text = self._find_sql(start, self._prev) 5037 size = len(start.text) 5038 return exp.Command(this=text[:size], expression=text[size:]) 5039 5040 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5041 settings = [] 5042 5043 self._match_l_paren() 5044 kind = self._parse_id_var() 5045 5046 if self._match(TokenType.L_PAREN): 5047 while True: 5048 key = self._parse_id_var() 5049 value = self._parse_primary() 5050 5051 if not key and value is None: 5052 break 5053 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5054 self._match(TokenType.R_PAREN) 5055 5056 self._match_r_paren() 5057 5058 return self.expression( 5059 exp.DictProperty, 5060 this=this, 5061 kind=kind.this if kind else None, 5062 settings=settings, 5063 ) 5064 5065 def _parse_dict_range(self, this: str) -> exp.DictRange: 5066 self._match_l_paren() 5067 has_min = self._match_text_seq("MIN") 5068 if has_min: 5069 min = self._parse_var() or self._parse_primary() 5070 self._match_text_seq("MAX") 5071 max = self._parse_var() or self._parse_primary() 5072 else: 5073 max = self._parse_var() or self._parse_primary() 5074 min = exp.Literal.number(0) 5075 self._match_r_paren() 5076 return self.expression(exp.DictRange, this=this, min=min, max=max) 5077 5078 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5079 index = self._index 5080 expression = self._parse_column() 5081 if not self._match(TokenType.IN): 5082 self._retreat(index - 1) 5083 return None 5084 iterator = self._parse_column() 5085 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5086 return self.expression( 5087 exp.Comprehension, 5088 this=this, 5089 expression=expression, 5090 iterator=iterator, 5091 condition=condition, 5092 ) 5093 5094 def _find_parser( 5095 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5096 ) -> t.Optional[t.Callable]: 5097 if not self._curr: 5098 return None 5099 5100 index = self._index 5101 this = [] 5102 while True: 5103 # The current token might be multiple words 5104 curr = self._curr.text.upper() 5105 key = curr.split(" ") 5106 this.append(curr) 5107 5108 self._advance() 5109 result, trie = in_trie(trie, key) 5110 if result == TrieResult.FAILED: 5111 break 5112 5113 if result == TrieResult.EXISTS: 5114 subparser = parsers[" ".join(this)] 5115 return subparser 5116 5117 self._retreat(index) 5118 return None 5119 5120 def _match(self, token_type, advance=True, expression=None): 5121 if not self._curr: 5122 return None 5123 5124 if self._curr.token_type == token_type: 5125 if advance: 5126 self._advance() 5127 self._add_comments(expression) 5128 return True 5129 5130 return None 5131 5132 def _match_set(self, types, advance=True): 5133 if not self._curr: 5134 return None 5135 5136 if self._curr.token_type in types: 5137 if advance: 5138 self._advance() 5139 return True 5140 5141 return None 5142 5143 def _match_pair(self, token_type_a, token_type_b, advance=True): 5144 if not self._curr or not self._next: 5145 return None 5146 5147 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5148 if advance: 5149 self._advance(2) 5150 return True 5151 5152 return None 5153 5154 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5155 if not self._match(TokenType.L_PAREN, expression=expression): 5156 self.raise_error("Expecting (") 5157 5158 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5159 if not self._match(TokenType.R_PAREN, expression=expression): 5160 self.raise_error("Expecting )") 5161 5162 def _match_texts(self, texts, advance=True): 5163 if self._curr and self._curr.text.upper() in texts: 5164 if advance: 5165 self._advance() 5166 return True 5167 return False 5168 5169 def _match_text_seq(self, *texts, advance=True): 5170 index = self._index 5171 for text in texts: 5172 if self._curr and self._curr.text.upper() == text: 5173 self._advance() 5174 else: 5175 self._retreat(index) 5176 return False 5177 5178 if not advance: 5179 self._retreat(index) 5180 5181 return True 5182 5183 @t.overload 5184 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5185 ... 5186 5187 @t.overload 5188 def _replace_columns_with_dots( 5189 self, this: t.Optional[exp.Expression] 5190 ) -> t.Optional[exp.Expression]: 5191 ... 5192 5193 def _replace_columns_with_dots(self, this): 5194 if isinstance(this, exp.Dot): 5195 exp.replace_children(this, self._replace_columns_with_dots) 5196 elif isinstance(this, exp.Column): 5197 exp.replace_children(this, self._replace_columns_with_dots) 5198 table = this.args.get("table") 5199 this = ( 5200 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5201 ) 5202 5203 return this 5204 5205 def _replace_lambda( 5206 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5207 ) -> t.Optional[exp.Expression]: 5208 if not node: 5209 return node 5210 5211 for column in node.find_all(exp.Column): 5212 if column.parts[0].name in lambda_variables: 5213 dot_or_id = column.to_dot() if column.table else column.this 5214 parent = column.parent 5215 5216 while isinstance(parent, exp.Dot): 5217 if not isinstance(parent.parent, exp.Dot): 5218 parent.replace(dot_or_id) 5219 break 5220 parent = parent.parent 5221 else: 5222 if column is node: 5223 node = dot_or_id 5224 else: 5225 column.replace(dot_or_id) 5226 return node 5227 5228 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5229 return [ 5230 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5231 for value in values 5232 if value 5233 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 } 220 221 SUBQUERY_PREDICATES = { 222 TokenType.ANY: exp.Any, 223 TokenType.ALL: exp.All, 224 TokenType.EXISTS: exp.Exists, 225 TokenType.SOME: exp.Any, 226 } 227 228 RESERVED_KEYWORDS = { 229 *Tokenizer.SINGLE_TOKENS.values(), 230 TokenType.SELECT, 231 } 232 233 DB_CREATABLES = { 234 TokenType.DATABASE, 235 TokenType.SCHEMA, 236 TokenType.TABLE, 237 TokenType.VIEW, 238 TokenType.DICTIONARY, 239 } 240 241 CREATABLES = { 242 TokenType.COLUMN, 243 TokenType.FUNCTION, 244 TokenType.INDEX, 245 TokenType.PROCEDURE, 246 *DB_CREATABLES, 247 } 248 249 # Tokens that can represent identifiers 250 ID_VAR_TOKENS = { 251 TokenType.VAR, 252 TokenType.ANTI, 253 TokenType.APPLY, 254 TokenType.ASC, 255 TokenType.AUTO_INCREMENT, 256 TokenType.BEGIN, 257 TokenType.CACHE, 258 TokenType.CASE, 259 TokenType.COLLATE, 260 TokenType.COMMAND, 261 TokenType.COMMENT, 262 TokenType.COMMIT, 263 TokenType.CONSTRAINT, 264 TokenType.DEFAULT, 265 TokenType.DELETE, 266 TokenType.DESC, 267 TokenType.DESCRIBE, 268 TokenType.DICTIONARY, 269 TokenType.DIV, 270 TokenType.END, 271 TokenType.EXECUTE, 272 TokenType.ESCAPE, 273 TokenType.FALSE, 274 TokenType.FIRST, 275 TokenType.FILTER, 276 TokenType.FORMAT, 277 TokenType.FULL, 278 TokenType.IS, 279 TokenType.ISNULL, 280 TokenType.INTERVAL, 281 TokenType.KEEP, 282 TokenType.KILL, 283 TokenType.LEFT, 284 TokenType.LOAD, 285 TokenType.MERGE, 286 TokenType.NATURAL, 287 TokenType.NEXT, 288 TokenType.OFFSET, 289 TokenType.ORDINALITY, 290 TokenType.OVERLAPS, 291 TokenType.OVERWRITE, 292 TokenType.PARTITION, 293 TokenType.PERCENT, 294 TokenType.PIVOT, 295 TokenType.PRAGMA, 296 TokenType.RANGE, 297 TokenType.REFERENCES, 298 TokenType.RIGHT, 299 TokenType.ROW, 300 TokenType.ROWS, 301 TokenType.SEMI, 302 TokenType.SET, 303 TokenType.SETTINGS, 304 TokenType.SHOW, 305 TokenType.TEMPORARY, 306 TokenType.TOP, 307 TokenType.TRUE, 308 TokenType.UNIQUE, 309 TokenType.UNPIVOT, 310 TokenType.UPDATE, 311 TokenType.VOLATILE, 312 TokenType.WINDOW, 313 *CREATABLES, 314 *SUBQUERY_PREDICATES, 315 *TYPE_TOKENS, 316 *NO_PAREN_FUNCTIONS, 317 } 318 319 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 320 321 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 322 TokenType.ANTI, 323 TokenType.APPLY, 324 TokenType.ASOF, 325 TokenType.FULL, 326 TokenType.LEFT, 327 TokenType.LOCK, 328 TokenType.NATURAL, 329 TokenType.OFFSET, 330 TokenType.RIGHT, 331 TokenType.SEMI, 332 TokenType.WINDOW, 333 } 334 335 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 336 337 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 338 339 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 340 341 FUNC_TOKENS = { 342 TokenType.COMMAND, 343 TokenType.CURRENT_DATE, 344 TokenType.CURRENT_DATETIME, 345 TokenType.CURRENT_TIMESTAMP, 346 TokenType.CURRENT_TIME, 347 TokenType.CURRENT_USER, 348 TokenType.FILTER, 349 TokenType.FIRST, 350 TokenType.FORMAT, 351 TokenType.GLOB, 352 TokenType.IDENTIFIER, 353 TokenType.INDEX, 354 TokenType.ISNULL, 355 TokenType.ILIKE, 356 TokenType.INSERT, 357 TokenType.LIKE, 358 TokenType.MERGE, 359 TokenType.OFFSET, 360 TokenType.PRIMARY_KEY, 361 TokenType.RANGE, 362 TokenType.REPLACE, 363 TokenType.RLIKE, 364 TokenType.ROW, 365 TokenType.UNNEST, 366 TokenType.VAR, 367 TokenType.LEFT, 368 TokenType.RIGHT, 369 TokenType.DATE, 370 TokenType.DATETIME, 371 TokenType.TABLE, 372 TokenType.TIMESTAMP, 373 TokenType.TIMESTAMPTZ, 374 TokenType.WINDOW, 375 TokenType.XOR, 376 *TYPE_TOKENS, 377 *SUBQUERY_PREDICATES, 378 } 379 380 CONJUNCTION = { 381 TokenType.AND: exp.And, 382 TokenType.OR: exp.Or, 383 } 384 385 EQUALITY = { 386 TokenType.EQ: exp.EQ, 387 TokenType.NEQ: exp.NEQ, 388 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 389 } 390 391 COMPARISON = { 392 TokenType.GT: exp.GT, 393 TokenType.GTE: exp.GTE, 394 TokenType.LT: exp.LT, 395 TokenType.LTE: exp.LTE, 396 } 397 398 BITWISE = { 399 TokenType.AMP: exp.BitwiseAnd, 400 TokenType.CARET: exp.BitwiseXor, 401 TokenType.PIPE: exp.BitwiseOr, 402 TokenType.DPIPE: exp.DPipe, 403 } 404 405 TERM = { 406 TokenType.DASH: exp.Sub, 407 TokenType.PLUS: exp.Add, 408 TokenType.MOD: exp.Mod, 409 TokenType.COLLATE: exp.Collate, 410 } 411 412 FACTOR = { 413 TokenType.DIV: exp.IntDiv, 414 TokenType.LR_ARROW: exp.Distance, 415 TokenType.SLASH: exp.Div, 416 TokenType.STAR: exp.Mul, 417 } 418 419 TIMES = { 420 TokenType.TIME, 421 TokenType.TIMETZ, 422 } 423 424 TIMESTAMPS = { 425 TokenType.TIMESTAMP, 426 TokenType.TIMESTAMPTZ, 427 TokenType.TIMESTAMPLTZ, 428 *TIMES, 429 } 430 431 SET_OPERATIONS = { 432 TokenType.UNION, 433 TokenType.INTERSECT, 434 TokenType.EXCEPT, 435 } 436 437 JOIN_METHODS = { 438 TokenType.NATURAL, 439 TokenType.ASOF, 440 } 441 442 JOIN_SIDES = { 443 TokenType.LEFT, 444 TokenType.RIGHT, 445 TokenType.FULL, 446 } 447 448 JOIN_KINDS = { 449 TokenType.INNER, 450 TokenType.OUTER, 451 TokenType.CROSS, 452 TokenType.SEMI, 453 TokenType.ANTI, 454 } 455 456 JOIN_HINTS: t.Set[str] = set() 457 458 LAMBDAS = { 459 TokenType.ARROW: lambda self, expressions: self.expression( 460 exp.Lambda, 461 this=self._replace_lambda( 462 self._parse_conjunction(), 463 {node.name for node in expressions}, 464 ), 465 expressions=expressions, 466 ), 467 TokenType.FARROW: lambda self, expressions: self.expression( 468 exp.Kwarg, 469 this=exp.var(expressions[0].name), 470 expression=self._parse_conjunction(), 471 ), 472 } 473 474 COLUMN_OPERATORS = { 475 TokenType.DOT: None, 476 TokenType.DCOLON: lambda self, this, to: self.expression( 477 exp.Cast if self.STRICT_CAST else exp.TryCast, 478 this=this, 479 to=to, 480 ), 481 TokenType.ARROW: lambda self, this, path: self.expression( 482 exp.JSONExtract, 483 this=this, 484 expression=path, 485 ), 486 TokenType.DARROW: lambda self, this, path: self.expression( 487 exp.JSONExtractScalar, 488 this=this, 489 expression=path, 490 ), 491 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 492 exp.JSONBExtract, 493 this=this, 494 expression=path, 495 ), 496 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 497 exp.JSONBExtractScalar, 498 this=this, 499 expression=path, 500 ), 501 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 502 exp.JSONBContains, 503 this=this, 504 expression=key, 505 ), 506 } 507 508 EXPRESSION_PARSERS = { 509 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 510 exp.Column: lambda self: self._parse_column(), 511 exp.Condition: lambda self: self._parse_conjunction(), 512 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 513 exp.Expression: lambda self: self._parse_statement(), 514 exp.From: lambda self: self._parse_from(), 515 exp.Group: lambda self: self._parse_group(), 516 exp.Having: lambda self: self._parse_having(), 517 exp.Identifier: lambda self: self._parse_id_var(), 518 exp.Join: lambda self: self._parse_join(), 519 exp.Lambda: lambda self: self._parse_lambda(), 520 exp.Lateral: lambda self: self._parse_lateral(), 521 exp.Limit: lambda self: self._parse_limit(), 522 exp.Offset: lambda self: self._parse_offset(), 523 exp.Order: lambda self: self._parse_order(), 524 exp.Ordered: lambda self: self._parse_ordered(), 525 exp.Properties: lambda self: self._parse_properties(), 526 exp.Qualify: lambda self: self._parse_qualify(), 527 exp.Returning: lambda self: self._parse_returning(), 528 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 529 exp.Table: lambda self: self._parse_table_parts(), 530 exp.TableAlias: lambda self: self._parse_table_alias(), 531 exp.Where: lambda self: self._parse_where(), 532 exp.Window: lambda self: self._parse_named_window(), 533 exp.With: lambda self: self._parse_with(), 534 "JOIN_TYPE": lambda self: self._parse_join_parts(), 535 } 536 537 STATEMENT_PARSERS = { 538 TokenType.ALTER: lambda self: self._parse_alter(), 539 TokenType.BEGIN: lambda self: self._parse_transaction(), 540 TokenType.CACHE: lambda self: self._parse_cache(), 541 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 542 TokenType.COMMENT: lambda self: self._parse_comment(), 543 TokenType.CREATE: lambda self: self._parse_create(), 544 TokenType.DELETE: lambda self: self._parse_delete(), 545 TokenType.DESC: lambda self: self._parse_describe(), 546 TokenType.DESCRIBE: lambda self: self._parse_describe(), 547 TokenType.DROP: lambda self: self._parse_drop(), 548 TokenType.INSERT: lambda self: self._parse_insert(), 549 TokenType.KILL: lambda self: self._parse_kill(), 550 TokenType.LOAD: lambda self: self._parse_load(), 551 TokenType.MERGE: lambda self: self._parse_merge(), 552 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 553 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 554 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 555 TokenType.SET: lambda self: self._parse_set(), 556 TokenType.UNCACHE: lambda self: self._parse_uncache(), 557 TokenType.UPDATE: lambda self: self._parse_update(), 558 TokenType.USE: lambda self: self.expression( 559 exp.Use, 560 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 561 and exp.var(self._prev.text), 562 this=self._parse_table(schema=False), 563 ), 564 } 565 566 UNARY_PARSERS = { 567 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 568 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 569 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 570 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 571 } 572 573 PRIMARY_PARSERS = { 574 TokenType.STRING: lambda self, token: self.expression( 575 exp.Literal, this=token.text, is_string=True 576 ), 577 TokenType.NUMBER: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=False 579 ), 580 TokenType.STAR: lambda self, _: self.expression( 581 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 582 ), 583 TokenType.NULL: lambda self, _: self.expression(exp.Null), 584 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 585 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 586 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 587 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 588 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 589 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 590 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 591 exp.National, this=token.text 592 ), 593 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 594 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 595 exp.RawString, this=token.text 596 ), 597 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 598 } 599 600 PLACEHOLDER_PARSERS = { 601 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 602 TokenType.PARAMETER: lambda self: self._parse_parameter(), 603 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 604 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 605 else None, 606 } 607 608 RANGE_PARSERS = { 609 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 610 TokenType.GLOB: binary_range_parser(exp.Glob), 611 TokenType.ILIKE: binary_range_parser(exp.ILike), 612 TokenType.IN: lambda self, this: self._parse_in(this), 613 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 614 TokenType.IS: lambda self, this: self._parse_is(this), 615 TokenType.LIKE: binary_range_parser(exp.Like), 616 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 617 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 618 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 619 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 620 } 621 622 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 623 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 624 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 625 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 626 "CHARACTER SET": lambda self: self._parse_character_set(), 627 "CHECKSUM": lambda self: self._parse_checksum(), 628 "CLUSTER BY": lambda self: self._parse_cluster(), 629 "CLUSTERED": lambda self: self._parse_clustered_by(), 630 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 631 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 632 "COPY": lambda self: self._parse_copy_property(), 633 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 634 "DEFINER": lambda self: self._parse_definer(), 635 "DETERMINISTIC": lambda self: self.expression( 636 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 637 ), 638 "DISTKEY": lambda self: self._parse_distkey(), 639 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 640 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 641 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 642 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 643 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 644 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 645 "FREESPACE": lambda self: self._parse_freespace(), 646 "HEAP": lambda self: self.expression(exp.HeapProperty), 647 "IMMUTABLE": lambda self: self.expression( 648 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 649 ), 650 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 651 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 652 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 653 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 654 "LIKE": lambda self: self._parse_create_like(), 655 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 656 "LOCK": lambda self: self._parse_locking(), 657 "LOCKING": lambda self: self._parse_locking(), 658 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 659 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 660 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 661 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 662 "NO": lambda self: self._parse_no_property(), 663 "ON": lambda self: self._parse_on_property(), 664 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 665 "PARTITION BY": lambda self: self._parse_partitioned_by(), 666 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 667 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 668 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 669 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 670 "RETURNS": lambda self: self._parse_returns(), 671 "ROW": lambda self: self._parse_row(), 672 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 673 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 674 "SETTINGS": lambda self: self.expression( 675 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 676 ), 677 "SORTKEY": lambda self: self._parse_sortkey(), 678 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 679 "STABLE": lambda self: self.expression( 680 exp.StabilityProperty, this=exp.Literal.string("STABLE") 681 ), 682 "STORED": lambda self: self._parse_stored(), 683 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 684 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 685 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 686 "TO": lambda self: self._parse_to_table(), 687 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 688 "TTL": lambda self: self._parse_ttl(), 689 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 690 "VOLATILE": lambda self: self._parse_volatile_property(), 691 "WITH": lambda self: self._parse_with_property(), 692 } 693 694 CONSTRAINT_PARSERS = { 695 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 696 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 697 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 698 "CHARACTER SET": lambda self: self.expression( 699 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 700 ), 701 "CHECK": lambda self: self.expression( 702 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 703 ), 704 "COLLATE": lambda self: self.expression( 705 exp.CollateColumnConstraint, this=self._parse_var() 706 ), 707 "COMMENT": lambda self: self.expression( 708 exp.CommentColumnConstraint, this=self._parse_string() 709 ), 710 "COMPRESS": lambda self: self._parse_compress(), 711 "CLUSTERED": lambda self: self.expression( 712 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 713 ), 714 "NONCLUSTERED": lambda self: self.expression( 715 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 716 ), 717 "DEFAULT": lambda self: self.expression( 718 exp.DefaultColumnConstraint, this=self._parse_bitwise() 719 ), 720 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 721 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 722 "FORMAT": lambda self: self.expression( 723 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 724 ), 725 "GENERATED": lambda self: self._parse_generated_as_identity(), 726 "IDENTITY": lambda self: self._parse_auto_increment(), 727 "INLINE": lambda self: self._parse_inline(), 728 "LIKE": lambda self: self._parse_create_like(), 729 "NOT": lambda self: self._parse_not_constraint(), 730 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 731 "ON": lambda self: ( 732 self._match(TokenType.UPDATE) 733 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 734 ) 735 or self.expression(exp.OnProperty, this=self._parse_id_var()), 736 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 737 "PRIMARY KEY": lambda self: self._parse_primary_key(), 738 "REFERENCES": lambda self: self._parse_references(match=False), 739 "TITLE": lambda self: self.expression( 740 exp.TitleColumnConstraint, this=self._parse_var_or_string() 741 ), 742 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 743 "UNIQUE": lambda self: self._parse_unique(), 744 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 745 "WITH": lambda self: self.expression( 746 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 747 ), 748 } 749 750 ALTER_PARSERS = { 751 "ADD": lambda self: self._parse_alter_table_add(), 752 "ALTER": lambda self: self._parse_alter_table_alter(), 753 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 754 "DROP": lambda self: self._parse_alter_table_drop(), 755 "RENAME": lambda self: self._parse_alter_table_rename(), 756 } 757 758 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 759 760 NO_PAREN_FUNCTION_PARSERS = { 761 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 762 "CASE": lambda self: self._parse_case(), 763 "IF": lambda self: self._parse_if(), 764 "NEXT": lambda self: self._parse_next_value_for(), 765 } 766 767 INVALID_FUNC_NAME_TOKENS = { 768 TokenType.IDENTIFIER, 769 TokenType.STRING, 770 } 771 772 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 773 774 FUNCTION_PARSERS = { 775 "ANY_VALUE": lambda self: self._parse_any_value(), 776 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 777 "CONCAT": lambda self: self._parse_concat(), 778 "CONCAT_WS": lambda self: self._parse_concat_ws(), 779 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 780 "DECODE": lambda self: self._parse_decode(), 781 "EXTRACT": lambda self: self._parse_extract(), 782 "JSON_OBJECT": lambda self: self._parse_json_object(), 783 "LOG": lambda self: self._parse_logarithm(), 784 "MATCH": lambda self: self._parse_match_against(), 785 "OPENJSON": lambda self: self._parse_open_json(), 786 "POSITION": lambda self: self._parse_position(), 787 "SAFE_CAST": lambda self: self._parse_cast(False), 788 "STRING_AGG": lambda self: self._parse_string_agg(), 789 "SUBSTRING": lambda self: self._parse_substring(), 790 "TRIM": lambda self: self._parse_trim(), 791 "TRY_CAST": lambda self: self._parse_cast(False), 792 "TRY_CONVERT": lambda self: self._parse_convert(False), 793 } 794 795 QUERY_MODIFIER_PARSERS = { 796 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 797 TokenType.WHERE: lambda self: ("where", self._parse_where()), 798 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 799 TokenType.HAVING: lambda self: ("having", self._parse_having()), 800 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 801 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 802 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 803 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 804 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 805 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 806 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 807 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 808 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 809 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 810 TokenType.CLUSTER_BY: lambda self: ( 811 "cluster", 812 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 813 ), 814 TokenType.DISTRIBUTE_BY: lambda self: ( 815 "distribute", 816 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 817 ), 818 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 819 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 820 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 821 } 822 823 SET_PARSERS = { 824 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 825 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 826 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 827 "TRANSACTION": lambda self: self._parse_set_transaction(), 828 } 829 830 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 831 832 TYPE_LITERAL_PARSERS = { 833 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 834 } 835 836 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 837 838 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 839 840 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 841 842 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 843 TRANSACTION_CHARACTERISTICS = { 844 "ISOLATION LEVEL REPEATABLE READ", 845 "ISOLATION LEVEL READ COMMITTED", 846 "ISOLATION LEVEL READ UNCOMMITTED", 847 "ISOLATION LEVEL SERIALIZABLE", 848 "READ WRITE", 849 "READ ONLY", 850 } 851 852 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 853 854 CLONE_KEYWORDS = {"CLONE", "COPY"} 855 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 856 857 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 858 859 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 860 861 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 862 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 863 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 864 865 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 866 867 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 868 869 DISTINCT_TOKENS = {TokenType.DISTINCT} 870 871 NULL_TOKENS = {TokenType.NULL} 872 873 STRICT_CAST = True 874 875 # A NULL arg in CONCAT yields NULL by default 876 CONCAT_NULL_OUTPUTS_STRING = False 877 878 PREFIXED_PIVOT_COLUMNS = False 879 IDENTIFY_PIVOT_STRINGS = False 880 881 LOG_BASE_FIRST = True 882 LOG_DEFAULTS_TO_LN = False 883 884 # Whether or not ADD is present for each column added by ALTER TABLE 885 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 886 887 # Whether or not the table sample clause expects CSV syntax 888 TABLESAMPLE_CSV = False 889 890 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 891 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 892 893 __slots__ = ( 894 "error_level", 895 "error_message_context", 896 "max_errors", 897 "sql", 898 "errors", 899 "_tokens", 900 "_index", 901 "_curr", 902 "_next", 903 "_prev", 904 "_prev_comments", 905 "_tokenizer", 906 ) 907 908 # Autofilled 909 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 910 INDEX_OFFSET: int = 0 911 UNNEST_COLUMN_ONLY: bool = False 912 ALIAS_POST_TABLESAMPLE: bool = False 913 STRICT_STRING_CONCAT = False 914 SUPPORTS_USER_DEFINED_TYPES = True 915 NORMALIZE_FUNCTIONS = "upper" 916 NULL_ORDERING: str = "nulls_are_small" 917 SHOW_TRIE: t.Dict = {} 918 SET_TRIE: t.Dict = {} 919 FORMAT_MAPPING: t.Dict[str, str] = {} 920 FORMAT_TRIE: t.Dict = {} 921 TIME_MAPPING: t.Dict[str, str] = {} 922 TIME_TRIE: t.Dict = {} 923 924 def __init__( 925 self, 926 error_level: t.Optional[ErrorLevel] = None, 927 error_message_context: int = 100, 928 max_errors: int = 3, 929 ): 930 self.error_level = error_level or ErrorLevel.IMMEDIATE 931 self.error_message_context = error_message_context 932 self.max_errors = max_errors 933 self._tokenizer = self.TOKENIZER_CLASS() 934 self.reset() 935 936 def reset(self): 937 self.sql = "" 938 self.errors = [] 939 self._tokens = [] 940 self._index = 0 941 self._curr = None 942 self._next = None 943 self._prev = None 944 self._prev_comments = None 945 946 def parse( 947 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 948 ) -> t.List[t.Optional[exp.Expression]]: 949 """ 950 Parses a list of tokens and returns a list of syntax trees, one tree 951 per parsed SQL statement. 952 953 Args: 954 raw_tokens: The list of tokens. 955 sql: The original SQL string, used to produce helpful debug messages. 956 957 Returns: 958 The list of the produced syntax trees. 959 """ 960 return self._parse( 961 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 962 ) 963 964 def parse_into( 965 self, 966 expression_types: exp.IntoType, 967 raw_tokens: t.List[Token], 968 sql: t.Optional[str] = None, 969 ) -> t.List[t.Optional[exp.Expression]]: 970 """ 971 Parses a list of tokens into a given Expression type. If a collection of Expression 972 types is given instead, this method will try to parse the token list into each one 973 of them, stopping at the first for which the parsing succeeds. 974 975 Args: 976 expression_types: The expression type(s) to try and parse the token list into. 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The target Expression. 982 """ 983 errors = [] 984 for expression_type in ensure_list(expression_types): 985 parser = self.EXPRESSION_PARSERS.get(expression_type) 986 if not parser: 987 raise TypeError(f"No parser registered for {expression_type}") 988 989 try: 990 return self._parse(parser, raw_tokens, sql) 991 except ParseError as e: 992 e.errors[0]["into_expression"] = expression_type 993 errors.append(e) 994 995 raise ParseError( 996 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 997 errors=merge_errors(errors), 998 ) from errors[-1] 999 1000 def _parse( 1001 self, 1002 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1003 raw_tokens: t.List[Token], 1004 sql: t.Optional[str] = None, 1005 ) -> t.List[t.Optional[exp.Expression]]: 1006 self.reset() 1007 self.sql = sql or "" 1008 1009 total = len(raw_tokens) 1010 chunks: t.List[t.List[Token]] = [[]] 1011 1012 for i, token in enumerate(raw_tokens): 1013 if token.token_type == TokenType.SEMICOLON: 1014 if i < total - 1: 1015 chunks.append([]) 1016 else: 1017 chunks[-1].append(token) 1018 1019 expressions = [] 1020 1021 for tokens in chunks: 1022 self._index = -1 1023 self._tokens = tokens 1024 self._advance() 1025 1026 expressions.append(parse_method(self)) 1027 1028 if self._index < len(self._tokens): 1029 self.raise_error("Invalid expression / Unexpected token") 1030 1031 self.check_errors() 1032 1033 return expressions 1034 1035 def check_errors(self) -> None: 1036 """Logs or raises any found errors, depending on the chosen error level setting.""" 1037 if self.error_level == ErrorLevel.WARN: 1038 for error in self.errors: 1039 logger.error(str(error)) 1040 elif self.error_level == ErrorLevel.RAISE and self.errors: 1041 raise ParseError( 1042 concat_messages(self.errors, self.max_errors), 1043 errors=merge_errors(self.errors), 1044 ) 1045 1046 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1047 """ 1048 Appends an error in the list of recorded errors or raises it, depending on the chosen 1049 error level setting. 1050 """ 1051 token = token or self._curr or self._prev or Token.string("") 1052 start = token.start 1053 end = token.end + 1 1054 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1055 highlight = self.sql[start:end] 1056 end_context = self.sql[end : end + self.error_message_context] 1057 1058 error = ParseError.new( 1059 f"{message}. Line {token.line}, Col: {token.col}.\n" 1060 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1061 description=message, 1062 line=token.line, 1063 col=token.col, 1064 start_context=start_context, 1065 highlight=highlight, 1066 end_context=end_context, 1067 ) 1068 1069 if self.error_level == ErrorLevel.IMMEDIATE: 1070 raise error 1071 1072 self.errors.append(error) 1073 1074 def expression( 1075 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1076 ) -> E: 1077 """ 1078 Creates a new, validated Expression. 1079 1080 Args: 1081 exp_class: The expression class to instantiate. 1082 comments: An optional list of comments to attach to the expression. 1083 kwargs: The arguments to set for the expression along with their respective values. 1084 1085 Returns: 1086 The target expression. 1087 """ 1088 instance = exp_class(**kwargs) 1089 instance.add_comments(comments) if comments else self._add_comments(instance) 1090 return self.validate_expression(instance) 1091 1092 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1093 if expression and self._prev_comments: 1094 expression.add_comments(self._prev_comments) 1095 self._prev_comments = None 1096 1097 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1098 """ 1099 Validates an Expression, making sure that all its mandatory arguments are set. 1100 1101 Args: 1102 expression: The expression to validate. 1103 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1104 1105 Returns: 1106 The validated expression. 1107 """ 1108 if self.error_level != ErrorLevel.IGNORE: 1109 for error_message in expression.error_messages(args): 1110 self.raise_error(error_message) 1111 1112 return expression 1113 1114 def _find_sql(self, start: Token, end: Token) -> str: 1115 return self.sql[start.start : end.end + 1] 1116 1117 def _advance(self, times: int = 1) -> None: 1118 self._index += times 1119 self._curr = seq_get(self._tokens, self._index) 1120 self._next = seq_get(self._tokens, self._index + 1) 1121 1122 if self._index > 0: 1123 self._prev = self._tokens[self._index - 1] 1124 self._prev_comments = self._prev.comments 1125 else: 1126 self._prev = None 1127 self._prev_comments = None 1128 1129 def _retreat(self, index: int) -> None: 1130 if index != self._index: 1131 self._advance(index - self._index) 1132 1133 def _parse_command(self) -> exp.Command: 1134 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1135 1136 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1137 start = self._prev 1138 exists = self._parse_exists() if allow_exists else None 1139 1140 self._match(TokenType.ON) 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1147 this = self._parse_user_defined_function(kind=kind.token_type) 1148 elif kind.token_type == TokenType.TABLE: 1149 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1150 elif kind.token_type == TokenType.COLUMN: 1151 this = self._parse_column() 1152 else: 1153 this = self._parse_id_var() 1154 1155 self._match(TokenType.IS) 1156 1157 return self.expression( 1158 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1159 ) 1160 1161 def _parse_to_table( 1162 self, 1163 ) -> exp.ToTableProperty: 1164 table = self._parse_table_parts(schema=True) 1165 return self.expression(exp.ToTableProperty, this=table) 1166 1167 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1168 def _parse_ttl(self) -> exp.Expression: 1169 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1170 this = self._parse_bitwise() 1171 1172 if self._match_text_seq("DELETE"): 1173 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1174 if self._match_text_seq("RECOMPRESS"): 1175 return self.expression( 1176 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1177 ) 1178 if self._match_text_seq("TO", "DISK"): 1179 return self.expression( 1180 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1181 ) 1182 if self._match_text_seq("TO", "VOLUME"): 1183 return self.expression( 1184 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1185 ) 1186 1187 return this 1188 1189 expressions = self._parse_csv(_parse_ttl_action) 1190 where = self._parse_where() 1191 group = self._parse_group() 1192 1193 aggregates = None 1194 if group and self._match(TokenType.SET): 1195 aggregates = self._parse_csv(self._parse_set_item) 1196 1197 return self.expression( 1198 exp.MergeTreeTTL, 1199 expressions=expressions, 1200 where=where, 1201 group=group, 1202 aggregates=aggregates, 1203 ) 1204 1205 def _parse_statement(self) -> t.Optional[exp.Expression]: 1206 if self._curr is None: 1207 return None 1208 1209 if self._match_set(self.STATEMENT_PARSERS): 1210 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1211 1212 if self._match_set(Tokenizer.COMMANDS): 1213 return self._parse_command() 1214 1215 expression = self._parse_expression() 1216 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1217 return self._parse_query_modifiers(expression) 1218 1219 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1220 start = self._prev 1221 temporary = self._match(TokenType.TEMPORARY) 1222 materialized = self._match_text_seq("MATERIALIZED") 1223 1224 kind = self._match_set(self.CREATABLES) and self._prev.text 1225 if not kind: 1226 return self._parse_as_command(start) 1227 1228 return self.expression( 1229 exp.Drop, 1230 comments=start.comments, 1231 exists=exists or self._parse_exists(), 1232 this=self._parse_table(schema=True), 1233 kind=kind, 1234 temporary=temporary, 1235 materialized=materialized, 1236 cascade=self._match_text_seq("CASCADE"), 1237 constraints=self._match_text_seq("CONSTRAINTS"), 1238 purge=self._match_text_seq("PURGE"), 1239 ) 1240 1241 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1242 return ( 1243 self._match_text_seq("IF") 1244 and (not not_ or self._match(TokenType.NOT)) 1245 and self._match(TokenType.EXISTS) 1246 ) 1247 1248 def _parse_create(self) -> exp.Create | exp.Command: 1249 # Note: this can't be None because we've matched a statement parser 1250 start = self._prev 1251 comments = self._prev_comments 1252 1253 replace = start.text.upper() == "REPLACE" or self._match_pair( 1254 TokenType.OR, TokenType.REPLACE 1255 ) 1256 unique = self._match(TokenType.UNIQUE) 1257 1258 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1259 self._advance() 1260 1261 properties = None 1262 create_token = self._match_set(self.CREATABLES) and self._prev 1263 1264 if not create_token: 1265 # exp.Properties.Location.POST_CREATE 1266 properties = self._parse_properties() 1267 create_token = self._match_set(self.CREATABLES) and self._prev 1268 1269 if not properties or not create_token: 1270 return self._parse_as_command(start) 1271 1272 exists = self._parse_exists(not_=True) 1273 this = None 1274 expression: t.Optional[exp.Expression] = None 1275 indexes = None 1276 no_schema_binding = None 1277 begin = None 1278 clone = None 1279 1280 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1281 nonlocal properties 1282 if properties and temp_props: 1283 properties.expressions.extend(temp_props.expressions) 1284 elif temp_props: 1285 properties = temp_props 1286 1287 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1288 this = self._parse_user_defined_function(kind=create_token.token_type) 1289 1290 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1291 extend_props(self._parse_properties()) 1292 1293 self._match(TokenType.ALIAS) 1294 1295 if self._match(TokenType.COMMAND): 1296 expression = self._parse_as_command(self._prev) 1297 else: 1298 begin = self._match(TokenType.BEGIN) 1299 return_ = self._match_text_seq("RETURN") 1300 1301 if self._match(TokenType.STRING, advance=False): 1302 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1303 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1304 expression = self._parse_string() 1305 extend_props(self._parse_properties()) 1306 else: 1307 expression = self._parse_statement() 1308 1309 if return_: 1310 expression = self.expression(exp.Return, this=expression) 1311 elif create_token.token_type == TokenType.INDEX: 1312 this = self._parse_index(index=self._parse_id_var()) 1313 elif create_token.token_type in self.DB_CREATABLES: 1314 table_parts = self._parse_table_parts(schema=True) 1315 1316 # exp.Properties.Location.POST_NAME 1317 self._match(TokenType.COMMA) 1318 extend_props(self._parse_properties(before=True)) 1319 1320 this = self._parse_schema(this=table_parts) 1321 1322 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1323 extend_props(self._parse_properties()) 1324 1325 self._match(TokenType.ALIAS) 1326 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1327 # exp.Properties.Location.POST_ALIAS 1328 extend_props(self._parse_properties()) 1329 1330 expression = self._parse_ddl_select() 1331 1332 if create_token.token_type == TokenType.TABLE: 1333 # exp.Properties.Location.POST_EXPRESSION 1334 extend_props(self._parse_properties()) 1335 1336 indexes = [] 1337 while True: 1338 index = self._parse_index() 1339 1340 # exp.Properties.Location.POST_INDEX 1341 extend_props(self._parse_properties()) 1342 1343 if not index: 1344 break 1345 else: 1346 self._match(TokenType.COMMA) 1347 indexes.append(index) 1348 elif create_token.token_type == TokenType.VIEW: 1349 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1350 no_schema_binding = True 1351 1352 shallow = self._match_text_seq("SHALLOW") 1353 1354 if self._match_texts(self.CLONE_KEYWORDS): 1355 copy = self._prev.text.lower() == "copy" 1356 clone = self._parse_table(schema=True) 1357 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1358 clone_kind = ( 1359 self._match(TokenType.L_PAREN) 1360 and self._match_texts(self.CLONE_KINDS) 1361 and self._prev.text.upper() 1362 ) 1363 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1364 self._match(TokenType.R_PAREN) 1365 clone = self.expression( 1366 exp.Clone, 1367 this=clone, 1368 when=when, 1369 kind=clone_kind, 1370 shallow=shallow, 1371 expression=clone_expression, 1372 copy=copy, 1373 ) 1374 1375 return self.expression( 1376 exp.Create, 1377 comments=comments, 1378 this=this, 1379 kind=create_token.text, 1380 replace=replace, 1381 unique=unique, 1382 expression=expression, 1383 exists=exists, 1384 properties=properties, 1385 indexes=indexes, 1386 no_schema_binding=no_schema_binding, 1387 begin=begin, 1388 clone=clone, 1389 ) 1390 1391 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1392 # only used for teradata currently 1393 self._match(TokenType.COMMA) 1394 1395 kwargs = { 1396 "no": self._match_text_seq("NO"), 1397 "dual": self._match_text_seq("DUAL"), 1398 "before": self._match_text_seq("BEFORE"), 1399 "default": self._match_text_seq("DEFAULT"), 1400 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1401 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1402 "after": self._match_text_seq("AFTER"), 1403 "minimum": self._match_texts(("MIN", "MINIMUM")), 1404 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1405 } 1406 1407 if self._match_texts(self.PROPERTY_PARSERS): 1408 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1409 try: 1410 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1411 except TypeError: 1412 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1413 1414 return None 1415 1416 def _parse_property(self) -> t.Optional[exp.Expression]: 1417 if self._match_texts(self.PROPERTY_PARSERS): 1418 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1419 1420 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1421 return self._parse_character_set(default=True) 1422 1423 if self._match_text_seq("COMPOUND", "SORTKEY"): 1424 return self._parse_sortkey(compound=True) 1425 1426 if self._match_text_seq("SQL", "SECURITY"): 1427 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1428 1429 index = self._index 1430 key = self._parse_column() 1431 1432 if not self._match(TokenType.EQ): 1433 self._retreat(index) 1434 return None 1435 1436 return self.expression( 1437 exp.Property, 1438 this=key.to_dot() if isinstance(key, exp.Column) else key, 1439 value=self._parse_column() or self._parse_var(any_token=True), 1440 ) 1441 1442 def _parse_stored(self) -> exp.FileFormatProperty: 1443 self._match(TokenType.ALIAS) 1444 1445 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1446 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1447 1448 return self.expression( 1449 exp.FileFormatProperty, 1450 this=self.expression( 1451 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1452 ) 1453 if input_format or output_format 1454 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1455 ) 1456 1457 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1458 self._match(TokenType.EQ) 1459 self._match(TokenType.ALIAS) 1460 return self.expression(exp_class, this=self._parse_field()) 1461 1462 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1463 properties = [] 1464 while True: 1465 if before: 1466 prop = self._parse_property_before() 1467 else: 1468 prop = self._parse_property() 1469 1470 if not prop: 1471 break 1472 for p in ensure_list(prop): 1473 properties.append(p) 1474 1475 if properties: 1476 return self.expression(exp.Properties, expressions=properties) 1477 1478 return None 1479 1480 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1481 return self.expression( 1482 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1483 ) 1484 1485 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1486 if self._index >= 2: 1487 pre_volatile_token = self._tokens[self._index - 2] 1488 else: 1489 pre_volatile_token = None 1490 1491 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1492 return exp.VolatileProperty() 1493 1494 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1495 1496 def _parse_with_property( 1497 self, 1498 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1499 if self._match(TokenType.L_PAREN, advance=False): 1500 return self._parse_wrapped_csv(self._parse_property) 1501 1502 if self._match_text_seq("JOURNAL"): 1503 return self._parse_withjournaltable() 1504 1505 if self._match_text_seq("DATA"): 1506 return self._parse_withdata(no=False) 1507 elif self._match_text_seq("NO", "DATA"): 1508 return self._parse_withdata(no=True) 1509 1510 if not self._next: 1511 return None 1512 1513 return self._parse_withisolatedloading() 1514 1515 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1516 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1517 self._match(TokenType.EQ) 1518 1519 user = self._parse_id_var() 1520 self._match(TokenType.PARAMETER) 1521 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1522 1523 if not user or not host: 1524 return None 1525 1526 return exp.DefinerProperty(this=f"{user}@{host}") 1527 1528 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1529 self._match(TokenType.TABLE) 1530 self._match(TokenType.EQ) 1531 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1532 1533 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1534 return self.expression(exp.LogProperty, no=no) 1535 1536 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1537 return self.expression(exp.JournalProperty, **kwargs) 1538 1539 def _parse_checksum(self) -> exp.ChecksumProperty: 1540 self._match(TokenType.EQ) 1541 1542 on = None 1543 if self._match(TokenType.ON): 1544 on = True 1545 elif self._match_text_seq("OFF"): 1546 on = False 1547 1548 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1549 1550 def _parse_cluster(self) -> exp.Cluster: 1551 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1552 1553 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1554 self._match_text_seq("BY") 1555 1556 self._match_l_paren() 1557 expressions = self._parse_csv(self._parse_column) 1558 self._match_r_paren() 1559 1560 if self._match_text_seq("SORTED", "BY"): 1561 self._match_l_paren() 1562 sorted_by = self._parse_csv(self._parse_ordered) 1563 self._match_r_paren() 1564 else: 1565 sorted_by = None 1566 1567 self._match(TokenType.INTO) 1568 buckets = self._parse_number() 1569 self._match_text_seq("BUCKETS") 1570 1571 return self.expression( 1572 exp.ClusteredByProperty, 1573 expressions=expressions, 1574 sorted_by=sorted_by, 1575 buckets=buckets, 1576 ) 1577 1578 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1579 if not self._match_text_seq("GRANTS"): 1580 self._retreat(self._index - 1) 1581 return None 1582 1583 return self.expression(exp.CopyGrantsProperty) 1584 1585 def _parse_freespace(self) -> exp.FreespaceProperty: 1586 self._match(TokenType.EQ) 1587 return self.expression( 1588 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1589 ) 1590 1591 def _parse_mergeblockratio( 1592 self, no: bool = False, default: bool = False 1593 ) -> exp.MergeBlockRatioProperty: 1594 if self._match(TokenType.EQ): 1595 return self.expression( 1596 exp.MergeBlockRatioProperty, 1597 this=self._parse_number(), 1598 percent=self._match(TokenType.PERCENT), 1599 ) 1600 1601 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1602 1603 def _parse_datablocksize( 1604 self, 1605 default: t.Optional[bool] = None, 1606 minimum: t.Optional[bool] = None, 1607 maximum: t.Optional[bool] = None, 1608 ) -> exp.DataBlocksizeProperty: 1609 self._match(TokenType.EQ) 1610 size = self._parse_number() 1611 1612 units = None 1613 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1614 units = self._prev.text 1615 1616 return self.expression( 1617 exp.DataBlocksizeProperty, 1618 size=size, 1619 units=units, 1620 default=default, 1621 minimum=minimum, 1622 maximum=maximum, 1623 ) 1624 1625 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1626 self._match(TokenType.EQ) 1627 always = self._match_text_seq("ALWAYS") 1628 manual = self._match_text_seq("MANUAL") 1629 never = self._match_text_seq("NEVER") 1630 default = self._match_text_seq("DEFAULT") 1631 1632 autotemp = None 1633 if self._match_text_seq("AUTOTEMP"): 1634 autotemp = self._parse_schema() 1635 1636 return self.expression( 1637 exp.BlockCompressionProperty, 1638 always=always, 1639 manual=manual, 1640 never=never, 1641 default=default, 1642 autotemp=autotemp, 1643 ) 1644 1645 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1646 no = self._match_text_seq("NO") 1647 concurrent = self._match_text_seq("CONCURRENT") 1648 self._match_text_seq("ISOLATED", "LOADING") 1649 for_all = self._match_text_seq("FOR", "ALL") 1650 for_insert = self._match_text_seq("FOR", "INSERT") 1651 for_none = self._match_text_seq("FOR", "NONE") 1652 return self.expression( 1653 exp.IsolatedLoadingProperty, 1654 no=no, 1655 concurrent=concurrent, 1656 for_all=for_all, 1657 for_insert=for_insert, 1658 for_none=for_none, 1659 ) 1660 1661 def _parse_locking(self) -> exp.LockingProperty: 1662 if self._match(TokenType.TABLE): 1663 kind = "TABLE" 1664 elif self._match(TokenType.VIEW): 1665 kind = "VIEW" 1666 elif self._match(TokenType.ROW): 1667 kind = "ROW" 1668 elif self._match_text_seq("DATABASE"): 1669 kind = "DATABASE" 1670 else: 1671 kind = None 1672 1673 if kind in ("DATABASE", "TABLE", "VIEW"): 1674 this = self._parse_table_parts() 1675 else: 1676 this = None 1677 1678 if self._match(TokenType.FOR): 1679 for_or_in = "FOR" 1680 elif self._match(TokenType.IN): 1681 for_or_in = "IN" 1682 else: 1683 for_or_in = None 1684 1685 if self._match_text_seq("ACCESS"): 1686 lock_type = "ACCESS" 1687 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1688 lock_type = "EXCLUSIVE" 1689 elif self._match_text_seq("SHARE"): 1690 lock_type = "SHARE" 1691 elif self._match_text_seq("READ"): 1692 lock_type = "READ" 1693 elif self._match_text_seq("WRITE"): 1694 lock_type = "WRITE" 1695 elif self._match_text_seq("CHECKSUM"): 1696 lock_type = "CHECKSUM" 1697 else: 1698 lock_type = None 1699 1700 override = self._match_text_seq("OVERRIDE") 1701 1702 return self.expression( 1703 exp.LockingProperty, 1704 this=this, 1705 kind=kind, 1706 for_or_in=for_or_in, 1707 lock_type=lock_type, 1708 override=override, 1709 ) 1710 1711 def _parse_partition_by(self) -> t.List[exp.Expression]: 1712 if self._match(TokenType.PARTITION_BY): 1713 return self._parse_csv(self._parse_conjunction) 1714 return [] 1715 1716 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1717 self._match(TokenType.EQ) 1718 return self.expression( 1719 exp.PartitionedByProperty, 1720 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1721 ) 1722 1723 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1724 if self._match_text_seq("AND", "STATISTICS"): 1725 statistics = True 1726 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1727 statistics = False 1728 else: 1729 statistics = None 1730 1731 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1732 1733 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1734 if self._match_text_seq("PRIMARY", "INDEX"): 1735 return exp.NoPrimaryIndexProperty() 1736 return None 1737 1738 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1739 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1740 return exp.OnCommitProperty() 1741 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1742 return exp.OnCommitProperty(delete=True) 1743 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1744 1745 def _parse_distkey(self) -> exp.DistKeyProperty: 1746 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1747 1748 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1749 table = self._parse_table(schema=True) 1750 1751 options = [] 1752 while self._match_texts(("INCLUDING", "EXCLUDING")): 1753 this = self._prev.text.upper() 1754 1755 id_var = self._parse_id_var() 1756 if not id_var: 1757 return None 1758 1759 options.append( 1760 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1761 ) 1762 1763 return self.expression(exp.LikeProperty, this=table, expressions=options) 1764 1765 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1766 return self.expression( 1767 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1768 ) 1769 1770 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1771 self._match(TokenType.EQ) 1772 return self.expression( 1773 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1774 ) 1775 1776 def _parse_returns(self) -> exp.ReturnsProperty: 1777 value: t.Optional[exp.Expression] 1778 is_table = self._match(TokenType.TABLE) 1779 1780 if is_table: 1781 if self._match(TokenType.LT): 1782 value = self.expression( 1783 exp.Schema, 1784 this="TABLE", 1785 expressions=self._parse_csv(self._parse_struct_types), 1786 ) 1787 if not self._match(TokenType.GT): 1788 self.raise_error("Expecting >") 1789 else: 1790 value = self._parse_schema(exp.var("TABLE")) 1791 else: 1792 value = self._parse_types() 1793 1794 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1795 1796 def _parse_describe(self) -> exp.Describe: 1797 kind = self._match_set(self.CREATABLES) and self._prev.text 1798 this = self._parse_table(schema=True) 1799 properties = self._parse_properties() 1800 expressions = properties.expressions if properties else None 1801 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1802 1803 def _parse_insert(self) -> exp.Insert: 1804 comments = ensure_list(self._prev_comments) 1805 overwrite = self._match(TokenType.OVERWRITE) 1806 ignore = self._match(TokenType.IGNORE) 1807 local = self._match_text_seq("LOCAL") 1808 alternative = None 1809 1810 if self._match_text_seq("DIRECTORY"): 1811 this: t.Optional[exp.Expression] = self.expression( 1812 exp.Directory, 1813 this=self._parse_var_or_string(), 1814 local=local, 1815 row_format=self._parse_row_format(match_row=True), 1816 ) 1817 else: 1818 if self._match(TokenType.OR): 1819 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1820 1821 self._match(TokenType.INTO) 1822 comments += ensure_list(self._prev_comments) 1823 self._match(TokenType.TABLE) 1824 this = self._parse_table(schema=True) 1825 1826 returning = self._parse_returning() 1827 1828 return self.expression( 1829 exp.Insert, 1830 comments=comments, 1831 this=this, 1832 by_name=self._match_text_seq("BY", "NAME"), 1833 exists=self._parse_exists(), 1834 partition=self._parse_partition(), 1835 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1836 and self._parse_conjunction(), 1837 expression=self._parse_ddl_select(), 1838 conflict=self._parse_on_conflict(), 1839 returning=returning or self._parse_returning(), 1840 overwrite=overwrite, 1841 alternative=alternative, 1842 ignore=ignore, 1843 ) 1844 1845 def _parse_kill(self) -> exp.Kill: 1846 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1847 1848 return self.expression( 1849 exp.Kill, 1850 this=self._parse_primary(), 1851 kind=kind, 1852 ) 1853 1854 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1855 conflict = self._match_text_seq("ON", "CONFLICT") 1856 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1857 1858 if not conflict and not duplicate: 1859 return None 1860 1861 nothing = None 1862 expressions = None 1863 key = None 1864 constraint = None 1865 1866 if conflict: 1867 if self._match_text_seq("ON", "CONSTRAINT"): 1868 constraint = self._parse_id_var() 1869 else: 1870 key = self._parse_csv(self._parse_value) 1871 1872 self._match_text_seq("DO") 1873 if self._match_text_seq("NOTHING"): 1874 nothing = True 1875 else: 1876 self._match(TokenType.UPDATE) 1877 self._match(TokenType.SET) 1878 expressions = self._parse_csv(self._parse_equality) 1879 1880 return self.expression( 1881 exp.OnConflict, 1882 duplicate=duplicate, 1883 expressions=expressions, 1884 nothing=nothing, 1885 key=key, 1886 constraint=constraint, 1887 ) 1888 1889 def _parse_returning(self) -> t.Optional[exp.Returning]: 1890 if not self._match(TokenType.RETURNING): 1891 return None 1892 return self.expression( 1893 exp.Returning, 1894 expressions=self._parse_csv(self._parse_expression), 1895 into=self._match(TokenType.INTO) and self._parse_table_part(), 1896 ) 1897 1898 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1899 if not self._match(TokenType.FORMAT): 1900 return None 1901 return self._parse_row_format() 1902 1903 def _parse_row_format( 1904 self, match_row: bool = False 1905 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1906 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1907 return None 1908 1909 if self._match_text_seq("SERDE"): 1910 this = self._parse_string() 1911 1912 serde_properties = None 1913 if self._match(TokenType.SERDE_PROPERTIES): 1914 serde_properties = self.expression( 1915 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1916 ) 1917 1918 return self.expression( 1919 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1920 ) 1921 1922 self._match_text_seq("DELIMITED") 1923 1924 kwargs = {} 1925 1926 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1927 kwargs["fields"] = self._parse_string() 1928 if self._match_text_seq("ESCAPED", "BY"): 1929 kwargs["escaped"] = self._parse_string() 1930 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1931 kwargs["collection_items"] = self._parse_string() 1932 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1933 kwargs["map_keys"] = self._parse_string() 1934 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1935 kwargs["lines"] = self._parse_string() 1936 if self._match_text_seq("NULL", "DEFINED", "AS"): 1937 kwargs["null"] = self._parse_string() 1938 1939 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1940 1941 def _parse_load(self) -> exp.LoadData | exp.Command: 1942 if self._match_text_seq("DATA"): 1943 local = self._match_text_seq("LOCAL") 1944 self._match_text_seq("INPATH") 1945 inpath = self._parse_string() 1946 overwrite = self._match(TokenType.OVERWRITE) 1947 self._match_pair(TokenType.INTO, TokenType.TABLE) 1948 1949 return self.expression( 1950 exp.LoadData, 1951 this=self._parse_table(schema=True), 1952 local=local, 1953 overwrite=overwrite, 1954 inpath=inpath, 1955 partition=self._parse_partition(), 1956 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1957 serde=self._match_text_seq("SERDE") and self._parse_string(), 1958 ) 1959 return self._parse_as_command(self._prev) 1960 1961 def _parse_delete(self) -> exp.Delete: 1962 # This handles MySQL's "Multiple-Table Syntax" 1963 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1964 tables = None 1965 comments = self._prev_comments 1966 if not self._match(TokenType.FROM, advance=False): 1967 tables = self._parse_csv(self._parse_table) or None 1968 1969 returning = self._parse_returning() 1970 1971 return self.expression( 1972 exp.Delete, 1973 comments=comments, 1974 tables=tables, 1975 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1976 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1977 where=self._parse_where(), 1978 returning=returning or self._parse_returning(), 1979 limit=self._parse_limit(), 1980 ) 1981 1982 def _parse_update(self) -> exp.Update: 1983 comments = self._prev_comments 1984 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1985 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1986 returning = self._parse_returning() 1987 return self.expression( 1988 exp.Update, 1989 comments=comments, 1990 **{ # type: ignore 1991 "this": this, 1992 "expressions": expressions, 1993 "from": self._parse_from(joins=True), 1994 "where": self._parse_where(), 1995 "returning": returning or self._parse_returning(), 1996 "order": self._parse_order(), 1997 "limit": self._parse_limit(), 1998 }, 1999 ) 2000 2001 def _parse_uncache(self) -> exp.Uncache: 2002 if not self._match(TokenType.TABLE): 2003 self.raise_error("Expecting TABLE after UNCACHE") 2004 2005 return self.expression( 2006 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2007 ) 2008 2009 def _parse_cache(self) -> exp.Cache: 2010 lazy = self._match_text_seq("LAZY") 2011 self._match(TokenType.TABLE) 2012 table = self._parse_table(schema=True) 2013 2014 options = [] 2015 if self._match_text_seq("OPTIONS"): 2016 self._match_l_paren() 2017 k = self._parse_string() 2018 self._match(TokenType.EQ) 2019 v = self._parse_string() 2020 options = [k, v] 2021 self._match_r_paren() 2022 2023 self._match(TokenType.ALIAS) 2024 return self.expression( 2025 exp.Cache, 2026 this=table, 2027 lazy=lazy, 2028 options=options, 2029 expression=self._parse_select(nested=True), 2030 ) 2031 2032 def _parse_partition(self) -> t.Optional[exp.Partition]: 2033 if not self._match(TokenType.PARTITION): 2034 return None 2035 2036 return self.expression( 2037 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2038 ) 2039 2040 def _parse_value(self) -> exp.Tuple: 2041 if self._match(TokenType.L_PAREN): 2042 expressions = self._parse_csv(self._parse_conjunction) 2043 self._match_r_paren() 2044 return self.expression(exp.Tuple, expressions=expressions) 2045 2046 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2047 # https://prestodb.io/docs/current/sql/values.html 2048 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2049 2050 def _parse_projections(self) -> t.List[exp.Expression]: 2051 return self._parse_expressions() 2052 2053 def _parse_select( 2054 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2055 ) -> t.Optional[exp.Expression]: 2056 cte = self._parse_with() 2057 2058 if cte: 2059 this = self._parse_statement() 2060 2061 if not this: 2062 self.raise_error("Failed to parse any statement following CTE") 2063 return cte 2064 2065 if "with" in this.arg_types: 2066 this.set("with", cte) 2067 else: 2068 self.raise_error(f"{this.key} does not support CTE") 2069 this = cte 2070 2071 return this 2072 2073 # duckdb supports leading with FROM x 2074 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2075 2076 if self._match(TokenType.SELECT): 2077 comments = self._prev_comments 2078 2079 hint = self._parse_hint() 2080 all_ = self._match(TokenType.ALL) 2081 distinct = self._match_set(self.DISTINCT_TOKENS) 2082 2083 kind = ( 2084 self._match(TokenType.ALIAS) 2085 and self._match_texts(("STRUCT", "VALUE")) 2086 and self._prev.text 2087 ) 2088 2089 if distinct: 2090 distinct = self.expression( 2091 exp.Distinct, 2092 on=self._parse_value() if self._match(TokenType.ON) else None, 2093 ) 2094 2095 if all_ and distinct: 2096 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2097 2098 limit = self._parse_limit(top=True) 2099 projections = self._parse_projections() 2100 2101 this = self.expression( 2102 exp.Select, 2103 kind=kind, 2104 hint=hint, 2105 distinct=distinct, 2106 expressions=projections, 2107 limit=limit, 2108 ) 2109 this.comments = comments 2110 2111 into = self._parse_into() 2112 if into: 2113 this.set("into", into) 2114 2115 if not from_: 2116 from_ = self._parse_from() 2117 2118 if from_: 2119 this.set("from", from_) 2120 2121 this = self._parse_query_modifiers(this) 2122 elif (table or nested) and self._match(TokenType.L_PAREN): 2123 if self._match(TokenType.PIVOT): 2124 this = self._parse_simplified_pivot() 2125 elif self._match(TokenType.FROM): 2126 this = exp.select("*").from_( 2127 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2128 ) 2129 else: 2130 this = self._parse_table() if table else self._parse_select(nested=True) 2131 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2132 2133 self._match_r_paren() 2134 2135 # We return early here so that the UNION isn't attached to the subquery by the 2136 # following call to _parse_set_operations, but instead becomes the parent node 2137 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2138 elif self._match(TokenType.VALUES): 2139 this = self.expression( 2140 exp.Values, 2141 expressions=self._parse_csv(self._parse_value), 2142 alias=self._parse_table_alias(), 2143 ) 2144 elif from_: 2145 this = exp.select("*").from_(from_.this, copy=False) 2146 else: 2147 this = None 2148 2149 return self._parse_set_operations(this) 2150 2151 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2152 if not skip_with_token and not self._match(TokenType.WITH): 2153 return None 2154 2155 comments = self._prev_comments 2156 recursive = self._match(TokenType.RECURSIVE) 2157 2158 expressions = [] 2159 while True: 2160 expressions.append(self._parse_cte()) 2161 2162 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2163 break 2164 else: 2165 self._match(TokenType.WITH) 2166 2167 return self.expression( 2168 exp.With, comments=comments, expressions=expressions, recursive=recursive 2169 ) 2170 2171 def _parse_cte(self) -> exp.CTE: 2172 alias = self._parse_table_alias() 2173 if not alias or not alias.this: 2174 self.raise_error("Expected CTE to have alias") 2175 2176 self._match(TokenType.ALIAS) 2177 return self.expression( 2178 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2179 ) 2180 2181 def _parse_table_alias( 2182 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2183 ) -> t.Optional[exp.TableAlias]: 2184 any_token = self._match(TokenType.ALIAS) 2185 alias = ( 2186 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2187 or self._parse_string_as_identifier() 2188 ) 2189 2190 index = self._index 2191 if self._match(TokenType.L_PAREN): 2192 columns = self._parse_csv(self._parse_function_parameter) 2193 self._match_r_paren() if columns else self._retreat(index) 2194 else: 2195 columns = None 2196 2197 if not alias and not columns: 2198 return None 2199 2200 return self.expression(exp.TableAlias, this=alias, columns=columns) 2201 2202 def _parse_subquery( 2203 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2204 ) -> t.Optional[exp.Subquery]: 2205 if not this: 2206 return None 2207 2208 return self.expression( 2209 exp.Subquery, 2210 this=this, 2211 pivots=self._parse_pivots(), 2212 alias=self._parse_table_alias() if parse_alias else None, 2213 ) 2214 2215 def _parse_query_modifiers( 2216 self, this: t.Optional[exp.Expression] 2217 ) -> t.Optional[exp.Expression]: 2218 if isinstance(this, self.MODIFIABLES): 2219 for join in iter(self._parse_join, None): 2220 this.append("joins", join) 2221 for lateral in iter(self._parse_lateral, None): 2222 this.append("laterals", lateral) 2223 2224 while True: 2225 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2226 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2227 key, expression = parser(self) 2228 2229 if expression: 2230 this.set(key, expression) 2231 if key == "limit": 2232 offset = expression.args.pop("offset", None) 2233 if offset: 2234 this.set("offset", exp.Offset(expression=offset)) 2235 continue 2236 break 2237 return this 2238 2239 def _parse_hint(self) -> t.Optional[exp.Hint]: 2240 if self._match(TokenType.HINT): 2241 hints = [] 2242 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2243 hints.extend(hint) 2244 2245 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2246 self.raise_error("Expected */ after HINT") 2247 2248 return self.expression(exp.Hint, expressions=hints) 2249 2250 return None 2251 2252 def _parse_into(self) -> t.Optional[exp.Into]: 2253 if not self._match(TokenType.INTO): 2254 return None 2255 2256 temp = self._match(TokenType.TEMPORARY) 2257 unlogged = self._match_text_seq("UNLOGGED") 2258 self._match(TokenType.TABLE) 2259 2260 return self.expression( 2261 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2262 ) 2263 2264 def _parse_from( 2265 self, joins: bool = False, skip_from_token: bool = False 2266 ) -> t.Optional[exp.From]: 2267 if not skip_from_token and not self._match(TokenType.FROM): 2268 return None 2269 2270 return self.expression( 2271 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2272 ) 2273 2274 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2275 if not self._match(TokenType.MATCH_RECOGNIZE): 2276 return None 2277 2278 self._match_l_paren() 2279 2280 partition = self._parse_partition_by() 2281 order = self._parse_order() 2282 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2283 2284 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2285 rows = exp.var("ONE ROW PER MATCH") 2286 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2287 text = "ALL ROWS PER MATCH" 2288 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2289 text += f" SHOW EMPTY MATCHES" 2290 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2291 text += f" OMIT EMPTY MATCHES" 2292 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2293 text += f" WITH UNMATCHED ROWS" 2294 rows = exp.var(text) 2295 else: 2296 rows = None 2297 2298 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2299 text = "AFTER MATCH SKIP" 2300 if self._match_text_seq("PAST", "LAST", "ROW"): 2301 text += f" PAST LAST ROW" 2302 elif self._match_text_seq("TO", "NEXT", "ROW"): 2303 text += f" TO NEXT ROW" 2304 elif self._match_text_seq("TO", "FIRST"): 2305 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2306 elif self._match_text_seq("TO", "LAST"): 2307 text += f" TO LAST {self._advance_any().text}" # type: ignore 2308 after = exp.var(text) 2309 else: 2310 after = None 2311 2312 if self._match_text_seq("PATTERN"): 2313 self._match_l_paren() 2314 2315 if not self._curr: 2316 self.raise_error("Expecting )", self._curr) 2317 2318 paren = 1 2319 start = self._curr 2320 2321 while self._curr and paren > 0: 2322 if self._curr.token_type == TokenType.L_PAREN: 2323 paren += 1 2324 if self._curr.token_type == TokenType.R_PAREN: 2325 paren -= 1 2326 2327 end = self._prev 2328 self._advance() 2329 2330 if paren > 0: 2331 self.raise_error("Expecting )", self._curr) 2332 2333 pattern = exp.var(self._find_sql(start, end)) 2334 else: 2335 pattern = None 2336 2337 define = ( 2338 self._parse_csv( 2339 lambda: self.expression( 2340 exp.Alias, 2341 alias=self._parse_id_var(any_token=True), 2342 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2343 ) 2344 ) 2345 if self._match_text_seq("DEFINE") 2346 else None 2347 ) 2348 2349 self._match_r_paren() 2350 2351 return self.expression( 2352 exp.MatchRecognize, 2353 partition_by=partition, 2354 order=order, 2355 measures=measures, 2356 rows=rows, 2357 after=after, 2358 pattern=pattern, 2359 define=define, 2360 alias=self._parse_table_alias(), 2361 ) 2362 2363 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2364 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2365 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2366 2367 if outer_apply or cross_apply: 2368 this = self._parse_select(table=True) 2369 view = None 2370 outer = not cross_apply 2371 elif self._match(TokenType.LATERAL): 2372 this = self._parse_select(table=True) 2373 view = self._match(TokenType.VIEW) 2374 outer = self._match(TokenType.OUTER) 2375 else: 2376 return None 2377 2378 if not this: 2379 this = ( 2380 self._parse_unnest() 2381 or self._parse_function() 2382 or self._parse_id_var(any_token=False) 2383 ) 2384 2385 while self._match(TokenType.DOT): 2386 this = exp.Dot( 2387 this=this, 2388 expression=self._parse_function() or self._parse_id_var(any_token=False), 2389 ) 2390 2391 if view: 2392 table = self._parse_id_var(any_token=False) 2393 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2394 table_alias: t.Optional[exp.TableAlias] = self.expression( 2395 exp.TableAlias, this=table, columns=columns 2396 ) 2397 elif isinstance(this, exp.Subquery) and this.alias: 2398 # Ensures parity between the Subquery's and the Lateral's "alias" args 2399 table_alias = this.args["alias"].copy() 2400 else: 2401 table_alias = self._parse_table_alias() 2402 2403 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2404 2405 def _parse_join_parts( 2406 self, 2407 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2408 return ( 2409 self._match_set(self.JOIN_METHODS) and self._prev, 2410 self._match_set(self.JOIN_SIDES) and self._prev, 2411 self._match_set(self.JOIN_KINDS) and self._prev, 2412 ) 2413 2414 def _parse_join( 2415 self, skip_join_token: bool = False, parse_bracket: bool = False 2416 ) -> t.Optional[exp.Join]: 2417 if self._match(TokenType.COMMA): 2418 return self.expression(exp.Join, this=self._parse_table()) 2419 2420 index = self._index 2421 method, side, kind = self._parse_join_parts() 2422 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2423 join = self._match(TokenType.JOIN) 2424 2425 if not skip_join_token and not join: 2426 self._retreat(index) 2427 kind = None 2428 method = None 2429 side = None 2430 2431 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2432 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2433 2434 if not skip_join_token and not join and not outer_apply and not cross_apply: 2435 return None 2436 2437 if outer_apply: 2438 side = Token(TokenType.LEFT, "LEFT") 2439 2440 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2441 2442 if method: 2443 kwargs["method"] = method.text 2444 if side: 2445 kwargs["side"] = side.text 2446 if kind: 2447 kwargs["kind"] = kind.text 2448 if hint: 2449 kwargs["hint"] = hint 2450 2451 if self._match(TokenType.ON): 2452 kwargs["on"] = self._parse_conjunction() 2453 elif self._match(TokenType.USING): 2454 kwargs["using"] = self._parse_wrapped_id_vars() 2455 elif not (kind and kind.token_type == TokenType.CROSS): 2456 index = self._index 2457 joins = self._parse_joins() 2458 2459 if joins and self._match(TokenType.ON): 2460 kwargs["on"] = self._parse_conjunction() 2461 elif joins and self._match(TokenType.USING): 2462 kwargs["using"] = self._parse_wrapped_id_vars() 2463 else: 2464 joins = None 2465 self._retreat(index) 2466 2467 kwargs["this"].set("joins", joins) 2468 2469 comments = [c for token in (method, side, kind) if token for c in token.comments] 2470 return self.expression(exp.Join, comments=comments, **kwargs) 2471 2472 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2473 this = self._parse_conjunction() 2474 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2475 return this 2476 2477 opclass = self._parse_var(any_token=True) 2478 if opclass: 2479 return self.expression(exp.Opclass, this=this, expression=opclass) 2480 2481 return this 2482 2483 def _parse_index( 2484 self, 2485 index: t.Optional[exp.Expression] = None, 2486 ) -> t.Optional[exp.Index]: 2487 if index: 2488 unique = None 2489 primary = None 2490 amp = None 2491 2492 self._match(TokenType.ON) 2493 self._match(TokenType.TABLE) # hive 2494 table = self._parse_table_parts(schema=True) 2495 else: 2496 unique = self._match(TokenType.UNIQUE) 2497 primary = self._match_text_seq("PRIMARY") 2498 amp = self._match_text_seq("AMP") 2499 2500 if not self._match(TokenType.INDEX): 2501 return None 2502 2503 index = self._parse_id_var() 2504 table = None 2505 2506 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2507 2508 if self._match(TokenType.L_PAREN, advance=False): 2509 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2510 else: 2511 columns = None 2512 2513 return self.expression( 2514 exp.Index, 2515 this=index, 2516 table=table, 2517 using=using, 2518 columns=columns, 2519 unique=unique, 2520 primary=primary, 2521 amp=amp, 2522 partition_by=self._parse_partition_by(), 2523 where=self._parse_where(), 2524 ) 2525 2526 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2527 hints: t.List[exp.Expression] = [] 2528 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2529 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2530 hints.append( 2531 self.expression( 2532 exp.WithTableHint, 2533 expressions=self._parse_csv( 2534 lambda: self._parse_function() or self._parse_var(any_token=True) 2535 ), 2536 ) 2537 ) 2538 self._match_r_paren() 2539 else: 2540 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2541 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2542 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2543 2544 self._match_texts({"INDEX", "KEY"}) 2545 if self._match(TokenType.FOR): 2546 hint.set("target", self._advance_any() and self._prev.text.upper()) 2547 2548 hint.set("expressions", self._parse_wrapped_id_vars()) 2549 hints.append(hint) 2550 2551 return hints or None 2552 2553 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2554 return ( 2555 (not schema and self._parse_function(optional_parens=False)) 2556 or self._parse_id_var(any_token=False) 2557 or self._parse_string_as_identifier() 2558 or self._parse_placeholder() 2559 ) 2560 2561 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2562 catalog = None 2563 db = None 2564 table = self._parse_table_part(schema=schema) 2565 2566 while self._match(TokenType.DOT): 2567 if catalog: 2568 # This allows nesting the table in arbitrarily many dot expressions if needed 2569 table = self.expression( 2570 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2571 ) 2572 else: 2573 catalog = db 2574 db = table 2575 table = self._parse_table_part(schema=schema) 2576 2577 if not table: 2578 self.raise_error(f"Expected table name but got {self._curr}") 2579 2580 return self.expression( 2581 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2582 ) 2583 2584 def _parse_table( 2585 self, 2586 schema: bool = False, 2587 joins: bool = False, 2588 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2589 parse_bracket: bool = False, 2590 ) -> t.Optional[exp.Expression]: 2591 lateral = self._parse_lateral() 2592 if lateral: 2593 return lateral 2594 2595 unnest = self._parse_unnest() 2596 if unnest: 2597 return unnest 2598 2599 values = self._parse_derived_table_values() 2600 if values: 2601 return values 2602 2603 subquery = self._parse_select(table=True) 2604 if subquery: 2605 if not subquery.args.get("pivots"): 2606 subquery.set("pivots", self._parse_pivots()) 2607 return subquery 2608 2609 bracket = parse_bracket and self._parse_bracket(None) 2610 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2611 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2612 2613 if schema: 2614 return self._parse_schema(this=this) 2615 2616 version = self._parse_version() 2617 2618 if version: 2619 this.set("version", version) 2620 2621 if self.ALIAS_POST_TABLESAMPLE: 2622 table_sample = self._parse_table_sample() 2623 2624 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2625 if alias: 2626 this.set("alias", alias) 2627 2628 this.set("hints", self._parse_table_hints()) 2629 2630 if not this.args.get("pivots"): 2631 this.set("pivots", self._parse_pivots()) 2632 2633 if not self.ALIAS_POST_TABLESAMPLE: 2634 table_sample = self._parse_table_sample() 2635 2636 if table_sample: 2637 table_sample.set("this", this) 2638 this = table_sample 2639 2640 if joins: 2641 for join in iter(self._parse_join, None): 2642 this.append("joins", join) 2643 2644 return this 2645 2646 def _parse_version(self) -> t.Optional[exp.Version]: 2647 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2648 this = "TIMESTAMP" 2649 elif self._match(TokenType.VERSION_SNAPSHOT): 2650 this = "VERSION" 2651 else: 2652 return None 2653 2654 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2655 kind = self._prev.text.upper() 2656 start = self._parse_bitwise() 2657 self._match_texts(("TO", "AND")) 2658 end = self._parse_bitwise() 2659 expression: t.Optional[exp.Expression] = self.expression( 2660 exp.Tuple, expressions=[start, end] 2661 ) 2662 elif self._match_text_seq("CONTAINED", "IN"): 2663 kind = "CONTAINED IN" 2664 expression = self.expression( 2665 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2666 ) 2667 elif self._match(TokenType.ALL): 2668 kind = "ALL" 2669 expression = None 2670 else: 2671 self._match_text_seq("AS", "OF") 2672 kind = "AS OF" 2673 expression = self._parse_type() 2674 2675 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2676 2677 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2678 if not self._match(TokenType.UNNEST): 2679 return None 2680 2681 expressions = self._parse_wrapped_csv(self._parse_type) 2682 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2683 2684 alias = self._parse_table_alias() if with_alias else None 2685 2686 if alias: 2687 if self.UNNEST_COLUMN_ONLY: 2688 if alias.args.get("columns"): 2689 self.raise_error("Unexpected extra column alias in unnest.") 2690 2691 alias.set("columns", [alias.this]) 2692 alias.set("this", None) 2693 2694 columns = alias.args.get("columns") or [] 2695 if offset and len(expressions) < len(columns): 2696 offset = columns.pop() 2697 2698 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2699 self._match(TokenType.ALIAS) 2700 offset = self._parse_id_var() or exp.to_identifier("offset") 2701 2702 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2703 2704 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2705 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2706 if not is_derived and not self._match(TokenType.VALUES): 2707 return None 2708 2709 expressions = self._parse_csv(self._parse_value) 2710 alias = self._parse_table_alias() 2711 2712 if is_derived: 2713 self._match_r_paren() 2714 2715 return self.expression( 2716 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2717 ) 2718 2719 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2720 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2721 as_modifier and self._match_text_seq("USING", "SAMPLE") 2722 ): 2723 return None 2724 2725 bucket_numerator = None 2726 bucket_denominator = None 2727 bucket_field = None 2728 percent = None 2729 rows = None 2730 size = None 2731 seed = None 2732 2733 kind = ( 2734 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2735 ) 2736 method = self._parse_var(tokens=(TokenType.ROW,)) 2737 2738 self._match(TokenType.L_PAREN) 2739 2740 if self.TABLESAMPLE_CSV: 2741 num = None 2742 expressions = self._parse_csv(self._parse_primary) 2743 else: 2744 expressions = None 2745 num = self._parse_primary() 2746 2747 if self._match_text_seq("BUCKET"): 2748 bucket_numerator = self._parse_number() 2749 self._match_text_seq("OUT", "OF") 2750 bucket_denominator = bucket_denominator = self._parse_number() 2751 self._match(TokenType.ON) 2752 bucket_field = self._parse_field() 2753 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2754 percent = num 2755 elif self._match(TokenType.ROWS): 2756 rows = num 2757 elif num: 2758 size = num 2759 2760 self._match(TokenType.R_PAREN) 2761 2762 if self._match(TokenType.L_PAREN): 2763 method = self._parse_var() 2764 seed = self._match(TokenType.COMMA) and self._parse_number() 2765 self._match_r_paren() 2766 elif self._match_texts(("SEED", "REPEATABLE")): 2767 seed = self._parse_wrapped(self._parse_number) 2768 2769 return self.expression( 2770 exp.TableSample, 2771 expressions=expressions, 2772 method=method, 2773 bucket_numerator=bucket_numerator, 2774 bucket_denominator=bucket_denominator, 2775 bucket_field=bucket_field, 2776 percent=percent, 2777 rows=rows, 2778 size=size, 2779 seed=seed, 2780 kind=kind, 2781 ) 2782 2783 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2784 return list(iter(self._parse_pivot, None)) or None 2785 2786 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2787 return list(iter(self._parse_join, None)) or None 2788 2789 # https://duckdb.org/docs/sql/statements/pivot 2790 def _parse_simplified_pivot(self) -> exp.Pivot: 2791 def _parse_on() -> t.Optional[exp.Expression]: 2792 this = self._parse_bitwise() 2793 return self._parse_in(this) if self._match(TokenType.IN) else this 2794 2795 this = self._parse_table() 2796 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2797 using = self._match(TokenType.USING) and self._parse_csv( 2798 lambda: self._parse_alias(self._parse_function()) 2799 ) 2800 group = self._parse_group() 2801 return self.expression( 2802 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2803 ) 2804 2805 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2806 index = self._index 2807 include_nulls = None 2808 2809 if self._match(TokenType.PIVOT): 2810 unpivot = False 2811 elif self._match(TokenType.UNPIVOT): 2812 unpivot = True 2813 2814 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2815 if self._match_text_seq("INCLUDE", "NULLS"): 2816 include_nulls = True 2817 elif self._match_text_seq("EXCLUDE", "NULLS"): 2818 include_nulls = False 2819 else: 2820 return None 2821 2822 expressions = [] 2823 field = None 2824 2825 if not self._match(TokenType.L_PAREN): 2826 self._retreat(index) 2827 return None 2828 2829 if unpivot: 2830 expressions = self._parse_csv(self._parse_column) 2831 else: 2832 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2833 2834 if not expressions: 2835 self.raise_error("Failed to parse PIVOT's aggregation list") 2836 2837 if not self._match(TokenType.FOR): 2838 self.raise_error("Expecting FOR") 2839 2840 value = self._parse_column() 2841 2842 if not self._match(TokenType.IN): 2843 self.raise_error("Expecting IN") 2844 2845 field = self._parse_in(value, alias=True) 2846 2847 self._match_r_paren() 2848 2849 pivot = self.expression( 2850 exp.Pivot, 2851 expressions=expressions, 2852 field=field, 2853 unpivot=unpivot, 2854 include_nulls=include_nulls, 2855 ) 2856 2857 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2858 pivot.set("alias", self._parse_table_alias()) 2859 2860 if not unpivot: 2861 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2862 2863 columns: t.List[exp.Expression] = [] 2864 for fld in pivot.args["field"].expressions: 2865 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2866 for name in names: 2867 if self.PREFIXED_PIVOT_COLUMNS: 2868 name = f"{name}_{field_name}" if name else field_name 2869 else: 2870 name = f"{field_name}_{name}" if name else field_name 2871 2872 columns.append(exp.to_identifier(name)) 2873 2874 pivot.set("columns", columns) 2875 2876 return pivot 2877 2878 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2879 return [agg.alias for agg in aggregations] 2880 2881 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2882 if not skip_where_token and not self._match(TokenType.WHERE): 2883 return None 2884 2885 return self.expression( 2886 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2887 ) 2888 2889 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2890 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2891 return None 2892 2893 elements = defaultdict(list) 2894 2895 if self._match(TokenType.ALL): 2896 return self.expression(exp.Group, all=True) 2897 2898 while True: 2899 expressions = self._parse_csv(self._parse_conjunction) 2900 if expressions: 2901 elements["expressions"].extend(expressions) 2902 2903 grouping_sets = self._parse_grouping_sets() 2904 if grouping_sets: 2905 elements["grouping_sets"].extend(grouping_sets) 2906 2907 rollup = None 2908 cube = None 2909 totals = None 2910 2911 with_ = self._match(TokenType.WITH) 2912 if self._match(TokenType.ROLLUP): 2913 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2914 elements["rollup"].extend(ensure_list(rollup)) 2915 2916 if self._match(TokenType.CUBE): 2917 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2918 elements["cube"].extend(ensure_list(cube)) 2919 2920 if self._match_text_seq("TOTALS"): 2921 totals = True 2922 elements["totals"] = True # type: ignore 2923 2924 if not (grouping_sets or rollup or cube or totals): 2925 break 2926 2927 return self.expression(exp.Group, **elements) # type: ignore 2928 2929 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2930 if not self._match(TokenType.GROUPING_SETS): 2931 return None 2932 2933 return self._parse_wrapped_csv(self._parse_grouping_set) 2934 2935 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2936 if self._match(TokenType.L_PAREN): 2937 grouping_set = self._parse_csv(self._parse_column) 2938 self._match_r_paren() 2939 return self.expression(exp.Tuple, expressions=grouping_set) 2940 2941 return self._parse_column() 2942 2943 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2944 if not skip_having_token and not self._match(TokenType.HAVING): 2945 return None 2946 return self.expression(exp.Having, this=self._parse_conjunction()) 2947 2948 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2949 if not self._match(TokenType.QUALIFY): 2950 return None 2951 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2952 2953 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2954 if skip_start_token: 2955 start = None 2956 elif self._match(TokenType.START_WITH): 2957 start = self._parse_conjunction() 2958 else: 2959 return None 2960 2961 self._match(TokenType.CONNECT_BY) 2962 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2963 exp.Prior, this=self._parse_bitwise() 2964 ) 2965 connect = self._parse_conjunction() 2966 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2967 2968 if not start and self._match(TokenType.START_WITH): 2969 start = self._parse_conjunction() 2970 2971 return self.expression(exp.Connect, start=start, connect=connect) 2972 2973 def _parse_order( 2974 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2975 ) -> t.Optional[exp.Expression]: 2976 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2977 return this 2978 2979 return self.expression( 2980 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2981 ) 2982 2983 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2984 if not self._match(token): 2985 return None 2986 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2987 2988 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 2989 this = parse_method() if parse_method else self._parse_conjunction() 2990 2991 asc = self._match(TokenType.ASC) 2992 desc = self._match(TokenType.DESC) or (asc and False) 2993 2994 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2995 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2996 2997 nulls_first = is_nulls_first or False 2998 explicitly_null_ordered = is_nulls_first or is_nulls_last 2999 3000 if ( 3001 not explicitly_null_ordered 3002 and ( 3003 (not desc and self.NULL_ORDERING == "nulls_are_small") 3004 or (desc and self.NULL_ORDERING != "nulls_are_small") 3005 ) 3006 and self.NULL_ORDERING != "nulls_are_last" 3007 ): 3008 nulls_first = True 3009 3010 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3011 3012 def _parse_limit( 3013 self, this: t.Optional[exp.Expression] = None, top: bool = False 3014 ) -> t.Optional[exp.Expression]: 3015 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3016 comments = self._prev_comments 3017 if top: 3018 limit_paren = self._match(TokenType.L_PAREN) 3019 expression = self._parse_number() 3020 3021 if limit_paren: 3022 self._match_r_paren() 3023 else: 3024 expression = self._parse_term() 3025 3026 if self._match(TokenType.COMMA): 3027 offset = expression 3028 expression = self._parse_term() 3029 else: 3030 offset = None 3031 3032 limit_exp = self.expression( 3033 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3034 ) 3035 3036 return limit_exp 3037 3038 if self._match(TokenType.FETCH): 3039 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3040 direction = self._prev.text if direction else "FIRST" 3041 3042 count = self._parse_field(tokens=self.FETCH_TOKENS) 3043 percent = self._match(TokenType.PERCENT) 3044 3045 self._match_set((TokenType.ROW, TokenType.ROWS)) 3046 3047 only = self._match_text_seq("ONLY") 3048 with_ties = self._match_text_seq("WITH", "TIES") 3049 3050 if only and with_ties: 3051 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3052 3053 return self.expression( 3054 exp.Fetch, 3055 direction=direction, 3056 count=count, 3057 percent=percent, 3058 with_ties=with_ties, 3059 ) 3060 3061 return this 3062 3063 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3064 if not self._match(TokenType.OFFSET): 3065 return this 3066 3067 count = self._parse_term() 3068 self._match_set((TokenType.ROW, TokenType.ROWS)) 3069 return self.expression(exp.Offset, this=this, expression=count) 3070 3071 def _parse_locks(self) -> t.List[exp.Lock]: 3072 locks = [] 3073 while True: 3074 if self._match_text_seq("FOR", "UPDATE"): 3075 update = True 3076 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3077 "LOCK", "IN", "SHARE", "MODE" 3078 ): 3079 update = False 3080 else: 3081 break 3082 3083 expressions = None 3084 if self._match_text_seq("OF"): 3085 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3086 3087 wait: t.Optional[bool | exp.Expression] = None 3088 if self._match_text_seq("NOWAIT"): 3089 wait = True 3090 elif self._match_text_seq("WAIT"): 3091 wait = self._parse_primary() 3092 elif self._match_text_seq("SKIP", "LOCKED"): 3093 wait = False 3094 3095 locks.append( 3096 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3097 ) 3098 3099 return locks 3100 3101 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3102 if not self._match_set(self.SET_OPERATIONS): 3103 return this 3104 3105 token_type = self._prev.token_type 3106 3107 if token_type == TokenType.UNION: 3108 expression = exp.Union 3109 elif token_type == TokenType.EXCEPT: 3110 expression = exp.Except 3111 else: 3112 expression = exp.Intersect 3113 3114 return self.expression( 3115 expression, 3116 this=this, 3117 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3118 by_name=self._match_text_seq("BY", "NAME"), 3119 expression=self._parse_set_operations(self._parse_select(nested=True)), 3120 ) 3121 3122 def _parse_expression(self) -> t.Optional[exp.Expression]: 3123 return self._parse_alias(self._parse_conjunction()) 3124 3125 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3126 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3127 3128 def _parse_equality(self) -> t.Optional[exp.Expression]: 3129 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3130 3131 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3132 return self._parse_tokens(self._parse_range, self.COMPARISON) 3133 3134 def _parse_range(self) -> t.Optional[exp.Expression]: 3135 this = self._parse_bitwise() 3136 negate = self._match(TokenType.NOT) 3137 3138 if self._match_set(self.RANGE_PARSERS): 3139 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3140 if not expression: 3141 return this 3142 3143 this = expression 3144 elif self._match(TokenType.ISNULL): 3145 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3146 3147 # Postgres supports ISNULL and NOTNULL for conditions. 3148 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3149 if self._match(TokenType.NOTNULL): 3150 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3151 this = self.expression(exp.Not, this=this) 3152 3153 if negate: 3154 this = self.expression(exp.Not, this=this) 3155 3156 if self._match(TokenType.IS): 3157 this = self._parse_is(this) 3158 3159 return this 3160 3161 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3162 index = self._index - 1 3163 negate = self._match(TokenType.NOT) 3164 3165 if self._match_text_seq("DISTINCT", "FROM"): 3166 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3167 return self.expression(klass, this=this, expression=self._parse_expression()) 3168 3169 expression = self._parse_null() or self._parse_boolean() 3170 if not expression: 3171 self._retreat(index) 3172 return None 3173 3174 this = self.expression(exp.Is, this=this, expression=expression) 3175 return self.expression(exp.Not, this=this) if negate else this 3176 3177 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3178 unnest = self._parse_unnest(with_alias=False) 3179 if unnest: 3180 this = self.expression(exp.In, this=this, unnest=unnest) 3181 elif self._match(TokenType.L_PAREN): 3182 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3183 3184 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3185 this = self.expression(exp.In, this=this, query=expressions[0]) 3186 else: 3187 this = self.expression(exp.In, this=this, expressions=expressions) 3188 3189 self._match_r_paren(this) 3190 else: 3191 this = self.expression(exp.In, this=this, field=self._parse_field()) 3192 3193 return this 3194 3195 def _parse_between(self, this: exp.Expression) -> exp.Between: 3196 low = self._parse_bitwise() 3197 self._match(TokenType.AND) 3198 high = self._parse_bitwise() 3199 return self.expression(exp.Between, this=this, low=low, high=high) 3200 3201 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3202 if not self._match(TokenType.ESCAPE): 3203 return this 3204 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3205 3206 def _parse_interval(self) -> t.Optional[exp.Interval]: 3207 index = self._index 3208 3209 if not self._match(TokenType.INTERVAL): 3210 return None 3211 3212 if self._match(TokenType.STRING, advance=False): 3213 this = self._parse_primary() 3214 else: 3215 this = self._parse_term() 3216 3217 if not this: 3218 self._retreat(index) 3219 return None 3220 3221 unit = self._parse_function() or self._parse_var(any_token=True) 3222 3223 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3224 # each INTERVAL expression into this canonical form so it's easy to transpile 3225 if this and this.is_number: 3226 this = exp.Literal.string(this.name) 3227 elif this and this.is_string: 3228 parts = this.name.split() 3229 3230 if len(parts) == 2: 3231 if unit: 3232 # This is not actually a unit, it's something else (e.g. a "window side") 3233 unit = None 3234 self._retreat(self._index - 1) 3235 3236 this = exp.Literal.string(parts[0]) 3237 unit = self.expression(exp.Var, this=parts[1]) 3238 3239 return self.expression(exp.Interval, this=this, unit=unit) 3240 3241 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3242 this = self._parse_term() 3243 3244 while True: 3245 if self._match_set(self.BITWISE): 3246 this = self.expression( 3247 self.BITWISE[self._prev.token_type], 3248 this=this, 3249 expression=self._parse_term(), 3250 ) 3251 elif self._match(TokenType.DQMARK): 3252 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3253 elif self._match_pair(TokenType.LT, TokenType.LT): 3254 this = self.expression( 3255 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3256 ) 3257 elif self._match_pair(TokenType.GT, TokenType.GT): 3258 this = self.expression( 3259 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3260 ) 3261 else: 3262 break 3263 3264 return this 3265 3266 def _parse_term(self) -> t.Optional[exp.Expression]: 3267 return self._parse_tokens(self._parse_factor, self.TERM) 3268 3269 def _parse_factor(self) -> t.Optional[exp.Expression]: 3270 return self._parse_tokens(self._parse_unary, self.FACTOR) 3271 3272 def _parse_unary(self) -> t.Optional[exp.Expression]: 3273 if self._match_set(self.UNARY_PARSERS): 3274 return self.UNARY_PARSERS[self._prev.token_type](self) 3275 return self._parse_at_time_zone(self._parse_type()) 3276 3277 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3278 interval = parse_interval and self._parse_interval() 3279 if interval: 3280 return interval 3281 3282 index = self._index 3283 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3284 this = self._parse_column() 3285 3286 if data_type: 3287 if isinstance(this, exp.Literal): 3288 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3289 if parser: 3290 return parser(self, this, data_type) 3291 return self.expression(exp.Cast, this=this, to=data_type) 3292 if not data_type.expressions: 3293 self._retreat(index) 3294 return self._parse_column() 3295 return self._parse_column_ops(data_type) 3296 3297 return this and self._parse_column_ops(this) 3298 3299 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3300 this = self._parse_type() 3301 if not this: 3302 return None 3303 3304 return self.expression( 3305 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3306 ) 3307 3308 def _parse_types( 3309 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3310 ) -> t.Optional[exp.Expression]: 3311 index = self._index 3312 3313 prefix = self._match_text_seq("SYSUDTLIB", ".") 3314 3315 if not self._match_set(self.TYPE_TOKENS): 3316 identifier = allow_identifiers and self._parse_id_var( 3317 any_token=False, tokens=(TokenType.VAR,) 3318 ) 3319 3320 if identifier: 3321 tokens = self._tokenizer.tokenize(identifier.name) 3322 3323 if len(tokens) != 1: 3324 self.raise_error("Unexpected identifier", self._prev) 3325 3326 if tokens[0].token_type in self.TYPE_TOKENS: 3327 self._prev = tokens[0] 3328 elif self.SUPPORTS_USER_DEFINED_TYPES: 3329 type_name = identifier.name 3330 3331 while self._match(TokenType.DOT): 3332 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3333 3334 return exp.DataType.build(type_name, udt=True) 3335 else: 3336 return None 3337 else: 3338 return None 3339 3340 type_token = self._prev.token_type 3341 3342 if type_token == TokenType.PSEUDO_TYPE: 3343 return self.expression(exp.PseudoType, this=self._prev.text) 3344 3345 if type_token == TokenType.OBJECT_IDENTIFIER: 3346 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3347 3348 nested = type_token in self.NESTED_TYPE_TOKENS 3349 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3350 expressions = None 3351 maybe_func = False 3352 3353 if self._match(TokenType.L_PAREN): 3354 if is_struct: 3355 expressions = self._parse_csv(self._parse_struct_types) 3356 elif nested: 3357 expressions = self._parse_csv( 3358 lambda: self._parse_types( 3359 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3360 ) 3361 ) 3362 elif type_token in self.ENUM_TYPE_TOKENS: 3363 expressions = self._parse_csv(self._parse_equality) 3364 else: 3365 expressions = self._parse_csv(self._parse_type_size) 3366 3367 if not expressions or not self._match(TokenType.R_PAREN): 3368 self._retreat(index) 3369 return None 3370 3371 maybe_func = True 3372 3373 this: t.Optional[exp.Expression] = None 3374 values: t.Optional[t.List[exp.Expression]] = None 3375 3376 if nested and self._match(TokenType.LT): 3377 if is_struct: 3378 expressions = self._parse_csv(self._parse_struct_types) 3379 else: 3380 expressions = self._parse_csv( 3381 lambda: self._parse_types( 3382 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3383 ) 3384 ) 3385 3386 if not self._match(TokenType.GT): 3387 self.raise_error("Expecting >") 3388 3389 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3390 values = self._parse_csv(self._parse_conjunction) 3391 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3392 3393 if type_token in self.TIMESTAMPS: 3394 if self._match_text_seq("WITH", "TIME", "ZONE"): 3395 maybe_func = False 3396 tz_type = ( 3397 exp.DataType.Type.TIMETZ 3398 if type_token in self.TIMES 3399 else exp.DataType.Type.TIMESTAMPTZ 3400 ) 3401 this = exp.DataType(this=tz_type, expressions=expressions) 3402 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3403 maybe_func = False 3404 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3405 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3406 maybe_func = False 3407 elif type_token == TokenType.INTERVAL: 3408 unit = self._parse_var() 3409 3410 if self._match_text_seq("TO"): 3411 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3412 else: 3413 span = None 3414 3415 if span or not unit: 3416 this = self.expression( 3417 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3418 ) 3419 else: 3420 this = self.expression(exp.Interval, unit=unit) 3421 3422 if maybe_func and check_func: 3423 index2 = self._index 3424 peek = self._parse_string() 3425 3426 if not peek: 3427 self._retreat(index) 3428 return None 3429 3430 self._retreat(index2) 3431 3432 if not this: 3433 if self._match_text_seq("UNSIGNED"): 3434 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3435 if not unsigned_type_token: 3436 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3437 3438 type_token = unsigned_type_token or type_token 3439 3440 this = exp.DataType( 3441 this=exp.DataType.Type[type_token.value], 3442 expressions=expressions, 3443 nested=nested, 3444 values=values, 3445 prefix=prefix, 3446 ) 3447 3448 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3449 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3450 3451 return this 3452 3453 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3454 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3455 self._match(TokenType.COLON) 3456 return self._parse_column_def(this) 3457 3458 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3459 if not self._match_text_seq("AT", "TIME", "ZONE"): 3460 return this 3461 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3462 3463 def _parse_column(self) -> t.Optional[exp.Expression]: 3464 this = self._parse_field() 3465 if isinstance(this, exp.Identifier): 3466 this = self.expression(exp.Column, this=this) 3467 elif not this: 3468 return self._parse_bracket(this) 3469 return self._parse_column_ops(this) 3470 3471 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3472 this = self._parse_bracket(this) 3473 3474 while self._match_set(self.COLUMN_OPERATORS): 3475 op_token = self._prev.token_type 3476 op = self.COLUMN_OPERATORS.get(op_token) 3477 3478 if op_token == TokenType.DCOLON: 3479 field = self._parse_types() 3480 if not field: 3481 self.raise_error("Expected type") 3482 elif op and self._curr: 3483 self._advance() 3484 value = self._prev.text 3485 field = ( 3486 exp.Literal.number(value) 3487 if self._prev.token_type == TokenType.NUMBER 3488 else exp.Literal.string(value) 3489 ) 3490 else: 3491 field = self._parse_field(anonymous_func=True, any_token=True) 3492 3493 if isinstance(field, exp.Func): 3494 # bigquery allows function calls like x.y.count(...) 3495 # SAFE.SUBSTR(...) 3496 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3497 this = self._replace_columns_with_dots(this) 3498 3499 if op: 3500 this = op(self, this, field) 3501 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3502 this = self.expression( 3503 exp.Column, 3504 this=field, 3505 table=this.this, 3506 db=this.args.get("table"), 3507 catalog=this.args.get("db"), 3508 ) 3509 else: 3510 this = self.expression(exp.Dot, this=this, expression=field) 3511 this = self._parse_bracket(this) 3512 return this 3513 3514 def _parse_primary(self) -> t.Optional[exp.Expression]: 3515 if self._match_set(self.PRIMARY_PARSERS): 3516 token_type = self._prev.token_type 3517 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3518 3519 if token_type == TokenType.STRING: 3520 expressions = [primary] 3521 while self._match(TokenType.STRING): 3522 expressions.append(exp.Literal.string(self._prev.text)) 3523 3524 if len(expressions) > 1: 3525 return self.expression(exp.Concat, expressions=expressions) 3526 3527 return primary 3528 3529 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3530 return exp.Literal.number(f"0.{self._prev.text}") 3531 3532 if self._match(TokenType.L_PAREN): 3533 comments = self._prev_comments 3534 query = self._parse_select() 3535 3536 if query: 3537 expressions = [query] 3538 else: 3539 expressions = self._parse_expressions() 3540 3541 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3542 3543 if isinstance(this, exp.Subqueryable): 3544 this = self._parse_set_operations( 3545 self._parse_subquery(this=this, parse_alias=False) 3546 ) 3547 elif len(expressions) > 1: 3548 this = self.expression(exp.Tuple, expressions=expressions) 3549 else: 3550 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3551 3552 if this: 3553 this.add_comments(comments) 3554 3555 self._match_r_paren(expression=this) 3556 return this 3557 3558 return None 3559 3560 def _parse_field( 3561 self, 3562 any_token: bool = False, 3563 tokens: t.Optional[t.Collection[TokenType]] = None, 3564 anonymous_func: bool = False, 3565 ) -> t.Optional[exp.Expression]: 3566 return ( 3567 self._parse_primary() 3568 or self._parse_function(anonymous=anonymous_func) 3569 or self._parse_id_var(any_token=any_token, tokens=tokens) 3570 ) 3571 3572 def _parse_function( 3573 self, 3574 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3575 anonymous: bool = False, 3576 optional_parens: bool = True, 3577 ) -> t.Optional[exp.Expression]: 3578 if not self._curr: 3579 return None 3580 3581 token_type = self._curr.token_type 3582 this = self._curr.text 3583 upper = this.upper() 3584 3585 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3586 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3587 self._advance() 3588 return parser(self) 3589 3590 if not self._next or self._next.token_type != TokenType.L_PAREN: 3591 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3592 self._advance() 3593 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3594 3595 return None 3596 3597 if token_type not in self.FUNC_TOKENS: 3598 return None 3599 3600 self._advance(2) 3601 3602 parser = self.FUNCTION_PARSERS.get(upper) 3603 if parser and not anonymous: 3604 this = parser(self) 3605 else: 3606 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3607 3608 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3609 this = self.expression(subquery_predicate, this=self._parse_select()) 3610 self._match_r_paren() 3611 return this 3612 3613 if functions is None: 3614 functions = self.FUNCTIONS 3615 3616 function = functions.get(upper) 3617 3618 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3619 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3620 3621 if function and not anonymous: 3622 func = self.validate_expression(function(args), args) 3623 if not self.NORMALIZE_FUNCTIONS: 3624 func.meta["name"] = this 3625 this = func 3626 else: 3627 this = self.expression(exp.Anonymous, this=this, expressions=args) 3628 3629 self._match_r_paren(this) 3630 return self._parse_window(this) 3631 3632 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3633 return self._parse_column_def(self._parse_id_var()) 3634 3635 def _parse_user_defined_function( 3636 self, kind: t.Optional[TokenType] = None 3637 ) -> t.Optional[exp.Expression]: 3638 this = self._parse_id_var() 3639 3640 while self._match(TokenType.DOT): 3641 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3642 3643 if not self._match(TokenType.L_PAREN): 3644 return this 3645 3646 expressions = self._parse_csv(self._parse_function_parameter) 3647 self._match_r_paren() 3648 return self.expression( 3649 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3650 ) 3651 3652 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3653 literal = self._parse_primary() 3654 if literal: 3655 return self.expression(exp.Introducer, this=token.text, expression=literal) 3656 3657 return self.expression(exp.Identifier, this=token.text) 3658 3659 def _parse_session_parameter(self) -> exp.SessionParameter: 3660 kind = None 3661 this = self._parse_id_var() or self._parse_primary() 3662 3663 if this and self._match(TokenType.DOT): 3664 kind = this.name 3665 this = self._parse_var() or self._parse_primary() 3666 3667 return self.expression(exp.SessionParameter, this=this, kind=kind) 3668 3669 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3670 index = self._index 3671 3672 if self._match(TokenType.L_PAREN): 3673 expressions = t.cast( 3674 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3675 ) 3676 3677 if not self._match(TokenType.R_PAREN): 3678 self._retreat(index) 3679 else: 3680 expressions = [self._parse_id_var()] 3681 3682 if self._match_set(self.LAMBDAS): 3683 return self.LAMBDAS[self._prev.token_type](self, expressions) 3684 3685 self._retreat(index) 3686 3687 this: t.Optional[exp.Expression] 3688 3689 if self._match(TokenType.DISTINCT): 3690 this = self.expression( 3691 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3692 ) 3693 else: 3694 this = self._parse_select_or_expression(alias=alias) 3695 3696 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3697 3698 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3699 index = self._index 3700 3701 if not self.errors: 3702 try: 3703 if self._parse_select(nested=True): 3704 return this 3705 except ParseError: 3706 pass 3707 finally: 3708 self.errors.clear() 3709 self._retreat(index) 3710 3711 if not self._match(TokenType.L_PAREN): 3712 return this 3713 3714 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3715 3716 self._match_r_paren() 3717 return self.expression(exp.Schema, this=this, expressions=args) 3718 3719 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3720 return self._parse_column_def(self._parse_field(any_token=True)) 3721 3722 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3723 # column defs are not really columns, they're identifiers 3724 if isinstance(this, exp.Column): 3725 this = this.this 3726 3727 kind = self._parse_types(schema=True) 3728 3729 if self._match_text_seq("FOR", "ORDINALITY"): 3730 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3731 3732 constraints: t.List[exp.Expression] = [] 3733 3734 if not kind and self._match(TokenType.ALIAS): 3735 constraints.append( 3736 self.expression( 3737 exp.ComputedColumnConstraint, 3738 this=self._parse_conjunction(), 3739 persisted=self._match_text_seq("PERSISTED"), 3740 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3741 ) 3742 ) 3743 3744 while True: 3745 constraint = self._parse_column_constraint() 3746 if not constraint: 3747 break 3748 constraints.append(constraint) 3749 3750 if not kind and not constraints: 3751 return this 3752 3753 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3754 3755 def _parse_auto_increment( 3756 self, 3757 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3758 start = None 3759 increment = None 3760 3761 if self._match(TokenType.L_PAREN, advance=False): 3762 args = self._parse_wrapped_csv(self._parse_bitwise) 3763 start = seq_get(args, 0) 3764 increment = seq_get(args, 1) 3765 elif self._match_text_seq("START"): 3766 start = self._parse_bitwise() 3767 self._match_text_seq("INCREMENT") 3768 increment = self._parse_bitwise() 3769 3770 if start and increment: 3771 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3772 3773 return exp.AutoIncrementColumnConstraint() 3774 3775 def _parse_compress(self) -> exp.CompressColumnConstraint: 3776 if self._match(TokenType.L_PAREN, advance=False): 3777 return self.expression( 3778 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3779 ) 3780 3781 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3782 3783 def _parse_generated_as_identity( 3784 self, 3785 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3786 if self._match_text_seq("BY", "DEFAULT"): 3787 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3788 this = self.expression( 3789 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3790 ) 3791 else: 3792 self._match_text_seq("ALWAYS") 3793 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3794 3795 self._match(TokenType.ALIAS) 3796 identity = self._match_text_seq("IDENTITY") 3797 3798 if self._match(TokenType.L_PAREN): 3799 if self._match(TokenType.START_WITH): 3800 this.set("start", self._parse_bitwise()) 3801 if self._match_text_seq("INCREMENT", "BY"): 3802 this.set("increment", self._parse_bitwise()) 3803 if self._match_text_seq("MINVALUE"): 3804 this.set("minvalue", self._parse_bitwise()) 3805 if self._match_text_seq("MAXVALUE"): 3806 this.set("maxvalue", self._parse_bitwise()) 3807 3808 if self._match_text_seq("CYCLE"): 3809 this.set("cycle", True) 3810 elif self._match_text_seq("NO", "CYCLE"): 3811 this.set("cycle", False) 3812 3813 if not identity: 3814 this.set("expression", self._parse_bitwise()) 3815 3816 self._match_r_paren() 3817 3818 return this 3819 3820 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3821 self._match_text_seq("LENGTH") 3822 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3823 3824 def _parse_not_constraint( 3825 self, 3826 ) -> t.Optional[exp.Expression]: 3827 if self._match_text_seq("NULL"): 3828 return self.expression(exp.NotNullColumnConstraint) 3829 if self._match_text_seq("CASESPECIFIC"): 3830 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3831 if self._match_text_seq("FOR", "REPLICATION"): 3832 return self.expression(exp.NotForReplicationColumnConstraint) 3833 return None 3834 3835 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3836 if self._match(TokenType.CONSTRAINT): 3837 this = self._parse_id_var() 3838 else: 3839 this = None 3840 3841 if self._match_texts(self.CONSTRAINT_PARSERS): 3842 return self.expression( 3843 exp.ColumnConstraint, 3844 this=this, 3845 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3846 ) 3847 3848 return this 3849 3850 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3851 if not self._match(TokenType.CONSTRAINT): 3852 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3853 3854 this = self._parse_id_var() 3855 expressions = [] 3856 3857 while True: 3858 constraint = self._parse_unnamed_constraint() or self._parse_function() 3859 if not constraint: 3860 break 3861 expressions.append(constraint) 3862 3863 return self.expression(exp.Constraint, this=this, expressions=expressions) 3864 3865 def _parse_unnamed_constraint( 3866 self, constraints: t.Optional[t.Collection[str]] = None 3867 ) -> t.Optional[exp.Expression]: 3868 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3869 return None 3870 3871 constraint = self._prev.text.upper() 3872 if constraint not in self.CONSTRAINT_PARSERS: 3873 self.raise_error(f"No parser found for schema constraint {constraint}.") 3874 3875 return self.CONSTRAINT_PARSERS[constraint](self) 3876 3877 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3878 self._match_text_seq("KEY") 3879 return self.expression( 3880 exp.UniqueColumnConstraint, 3881 this=self._parse_schema(self._parse_id_var(any_token=False)), 3882 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3883 ) 3884 3885 def _parse_key_constraint_options(self) -> t.List[str]: 3886 options = [] 3887 while True: 3888 if not self._curr: 3889 break 3890 3891 if self._match(TokenType.ON): 3892 action = None 3893 on = self._advance_any() and self._prev.text 3894 3895 if self._match_text_seq("NO", "ACTION"): 3896 action = "NO ACTION" 3897 elif self._match_text_seq("CASCADE"): 3898 action = "CASCADE" 3899 elif self._match_text_seq("RESTRICT"): 3900 action = "RESTRICT" 3901 elif self._match_pair(TokenType.SET, TokenType.NULL): 3902 action = "SET NULL" 3903 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3904 action = "SET DEFAULT" 3905 else: 3906 self.raise_error("Invalid key constraint") 3907 3908 options.append(f"ON {on} {action}") 3909 elif self._match_text_seq("NOT", "ENFORCED"): 3910 options.append("NOT ENFORCED") 3911 elif self._match_text_seq("DEFERRABLE"): 3912 options.append("DEFERRABLE") 3913 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3914 options.append("INITIALLY DEFERRED") 3915 elif self._match_text_seq("NORELY"): 3916 options.append("NORELY") 3917 elif self._match_text_seq("MATCH", "FULL"): 3918 options.append("MATCH FULL") 3919 else: 3920 break 3921 3922 return options 3923 3924 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3925 if match and not self._match(TokenType.REFERENCES): 3926 return None 3927 3928 expressions = None 3929 this = self._parse_table(schema=True) 3930 options = self._parse_key_constraint_options() 3931 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3932 3933 def _parse_foreign_key(self) -> exp.ForeignKey: 3934 expressions = self._parse_wrapped_id_vars() 3935 reference = self._parse_references() 3936 options = {} 3937 3938 while self._match(TokenType.ON): 3939 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3940 self.raise_error("Expected DELETE or UPDATE") 3941 3942 kind = self._prev.text.lower() 3943 3944 if self._match_text_seq("NO", "ACTION"): 3945 action = "NO ACTION" 3946 elif self._match(TokenType.SET): 3947 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3948 action = "SET " + self._prev.text.upper() 3949 else: 3950 self._advance() 3951 action = self._prev.text.upper() 3952 3953 options[kind] = action 3954 3955 return self.expression( 3956 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3957 ) 3958 3959 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3960 return self._parse_field() 3961 3962 def _parse_primary_key( 3963 self, wrapped_optional: bool = False, in_props: bool = False 3964 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3965 desc = ( 3966 self._match_set((TokenType.ASC, TokenType.DESC)) 3967 and self._prev.token_type == TokenType.DESC 3968 ) 3969 3970 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3971 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3972 3973 expressions = self._parse_wrapped_csv( 3974 self._parse_primary_key_part, optional=wrapped_optional 3975 ) 3976 options = self._parse_key_constraint_options() 3977 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3978 3979 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3980 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3981 return this 3982 3983 bracket_kind = self._prev.token_type 3984 3985 if self._match(TokenType.COLON): 3986 expressions: t.List[exp.Expression] = [ 3987 self.expression(exp.Slice, expression=self._parse_conjunction()) 3988 ] 3989 else: 3990 expressions = self._parse_csv( 3991 lambda: self._parse_slice( 3992 self._parse_alias(self._parse_conjunction(), explicit=True) 3993 ) 3994 ) 3995 3996 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3997 if bracket_kind == TokenType.L_BRACE: 3998 this = self.expression(exp.Struct, expressions=expressions) 3999 elif not this or this.name.upper() == "ARRAY": 4000 this = self.expression(exp.Array, expressions=expressions) 4001 else: 4002 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4003 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4004 4005 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4006 self.raise_error("Expected ]") 4007 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4008 self.raise_error("Expected }") 4009 4010 self._add_comments(this) 4011 return self._parse_bracket(this) 4012 4013 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4014 if self._match(TokenType.COLON): 4015 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4016 return this 4017 4018 def _parse_case(self) -> t.Optional[exp.Expression]: 4019 ifs = [] 4020 default = None 4021 4022 comments = self._prev_comments 4023 expression = self._parse_conjunction() 4024 4025 while self._match(TokenType.WHEN): 4026 this = self._parse_conjunction() 4027 self._match(TokenType.THEN) 4028 then = self._parse_conjunction() 4029 ifs.append(self.expression(exp.If, this=this, true=then)) 4030 4031 if self._match(TokenType.ELSE): 4032 default = self._parse_conjunction() 4033 4034 if not self._match(TokenType.END): 4035 self.raise_error("Expected END after CASE", self._prev) 4036 4037 return self._parse_window( 4038 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4039 ) 4040 4041 def _parse_if(self) -> t.Optional[exp.Expression]: 4042 if self._match(TokenType.L_PAREN): 4043 args = self._parse_csv(self._parse_conjunction) 4044 this = self.validate_expression(exp.If.from_arg_list(args), args) 4045 self._match_r_paren() 4046 else: 4047 index = self._index - 1 4048 condition = self._parse_conjunction() 4049 4050 if not condition: 4051 self._retreat(index) 4052 return None 4053 4054 self._match(TokenType.THEN) 4055 true = self._parse_conjunction() 4056 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4057 self._match(TokenType.END) 4058 this = self.expression(exp.If, this=condition, true=true, false=false) 4059 4060 return self._parse_window(this) 4061 4062 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4063 if not self._match_text_seq("VALUE", "FOR"): 4064 self._retreat(self._index - 1) 4065 return None 4066 4067 return self.expression( 4068 exp.NextValueFor, 4069 this=self._parse_column(), 4070 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4071 ) 4072 4073 def _parse_extract(self) -> exp.Extract: 4074 this = self._parse_function() or self._parse_var() or self._parse_type() 4075 4076 if self._match(TokenType.FROM): 4077 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4078 4079 if not self._match(TokenType.COMMA): 4080 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4081 4082 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4083 4084 def _parse_any_value(self) -> exp.AnyValue: 4085 this = self._parse_lambda() 4086 is_max = None 4087 having = None 4088 4089 if self._match(TokenType.HAVING): 4090 self._match_texts(("MAX", "MIN")) 4091 is_max = self._prev.text == "MAX" 4092 having = self._parse_column() 4093 4094 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4095 4096 def _parse_cast(self, strict: bool) -> exp.Expression: 4097 this = self._parse_conjunction() 4098 4099 if not self._match(TokenType.ALIAS): 4100 if self._match(TokenType.COMMA): 4101 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4102 4103 self.raise_error("Expected AS after CAST") 4104 4105 fmt = None 4106 to = self._parse_types() 4107 4108 if not to: 4109 self.raise_error("Expected TYPE after CAST") 4110 elif isinstance(to, exp.Identifier): 4111 to = exp.DataType.build(to.name, udt=True) 4112 elif to.this == exp.DataType.Type.CHAR: 4113 if self._match(TokenType.CHARACTER_SET): 4114 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4115 elif self._match(TokenType.FORMAT): 4116 fmt_string = self._parse_string() 4117 fmt = self._parse_at_time_zone(fmt_string) 4118 4119 if to.this in exp.DataType.TEMPORAL_TYPES: 4120 this = self.expression( 4121 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4122 this=this, 4123 format=exp.Literal.string( 4124 format_time( 4125 fmt_string.this if fmt_string else "", 4126 self.FORMAT_MAPPING or self.TIME_MAPPING, 4127 self.FORMAT_TRIE or self.TIME_TRIE, 4128 ) 4129 ), 4130 ) 4131 4132 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4133 this.set("zone", fmt.args["zone"]) 4134 4135 return this 4136 4137 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4138 4139 def _parse_concat(self) -> t.Optional[exp.Expression]: 4140 args = self._parse_csv(self._parse_conjunction) 4141 if self.CONCAT_NULL_OUTPUTS_STRING: 4142 args = self._ensure_string_if_null(args) 4143 4144 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4145 # we find such a call we replace it with its argument. 4146 if len(args) == 1: 4147 return args[0] 4148 4149 return self.expression( 4150 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4151 ) 4152 4153 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4154 args = self._parse_csv(self._parse_conjunction) 4155 if len(args) < 2: 4156 return self.expression(exp.ConcatWs, expressions=args) 4157 delim, *values = args 4158 if self.CONCAT_NULL_OUTPUTS_STRING: 4159 values = self._ensure_string_if_null(values) 4160 4161 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4162 4163 def _parse_string_agg(self) -> exp.Expression: 4164 if self._match(TokenType.DISTINCT): 4165 args: t.List[t.Optional[exp.Expression]] = [ 4166 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4167 ] 4168 if self._match(TokenType.COMMA): 4169 args.extend(self._parse_csv(self._parse_conjunction)) 4170 else: 4171 args = self._parse_csv(self._parse_conjunction) # type: ignore 4172 4173 index = self._index 4174 if not self._match(TokenType.R_PAREN) and args: 4175 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4176 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4177 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4178 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4179 4180 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4181 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4182 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4183 if not self._match_text_seq("WITHIN", "GROUP"): 4184 self._retreat(index) 4185 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4186 4187 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4188 order = self._parse_order(this=seq_get(args, 0)) 4189 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4190 4191 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4192 this = self._parse_bitwise() 4193 4194 if self._match(TokenType.USING): 4195 to: t.Optional[exp.Expression] = self.expression( 4196 exp.CharacterSet, this=self._parse_var() 4197 ) 4198 elif self._match(TokenType.COMMA): 4199 to = self._parse_types() 4200 else: 4201 to = None 4202 4203 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4204 4205 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4206 """ 4207 There are generally two variants of the DECODE function: 4208 4209 - DECODE(bin, charset) 4210 - DECODE(expression, search, result [, search, result] ... [, default]) 4211 4212 The second variant will always be parsed into a CASE expression. Note that NULL 4213 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4214 instead of relying on pattern matching. 4215 """ 4216 args = self._parse_csv(self._parse_conjunction) 4217 4218 if len(args) < 3: 4219 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4220 4221 expression, *expressions = args 4222 if not expression: 4223 return None 4224 4225 ifs = [] 4226 for search, result in zip(expressions[::2], expressions[1::2]): 4227 if not search or not result: 4228 return None 4229 4230 if isinstance(search, exp.Literal): 4231 ifs.append( 4232 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4233 ) 4234 elif isinstance(search, exp.Null): 4235 ifs.append( 4236 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4237 ) 4238 else: 4239 cond = exp.or_( 4240 exp.EQ(this=expression.copy(), expression=search), 4241 exp.and_( 4242 exp.Is(this=expression.copy(), expression=exp.Null()), 4243 exp.Is(this=search.copy(), expression=exp.Null()), 4244 copy=False, 4245 ), 4246 copy=False, 4247 ) 4248 ifs.append(exp.If(this=cond, true=result)) 4249 4250 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4251 4252 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4253 self._match_text_seq("KEY") 4254 key = self._parse_column() 4255 self._match_set((TokenType.COLON, TokenType.COMMA)) 4256 self._match_text_seq("VALUE") 4257 value = self._parse_bitwise() 4258 4259 if not key and not value: 4260 return None 4261 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4262 4263 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4264 if not this or not self._match_text_seq("FORMAT", "JSON"): 4265 return this 4266 4267 return self.expression(exp.FormatJson, this=this) 4268 4269 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4270 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4271 for value in values: 4272 if self._match_text_seq(value, "ON", on): 4273 return f"{value} ON {on}" 4274 4275 return None 4276 4277 def _parse_json_object(self) -> exp.JSONObject: 4278 star = self._parse_star() 4279 expressions = ( 4280 [star] 4281 if star 4282 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4283 ) 4284 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4285 4286 unique_keys = None 4287 if self._match_text_seq("WITH", "UNIQUE"): 4288 unique_keys = True 4289 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4290 unique_keys = False 4291 4292 self._match_text_seq("KEYS") 4293 4294 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4295 self._parse_type() 4296 ) 4297 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4298 4299 return self.expression( 4300 exp.JSONObject, 4301 expressions=expressions, 4302 null_handling=null_handling, 4303 unique_keys=unique_keys, 4304 return_type=return_type, 4305 encoding=encoding, 4306 ) 4307 4308 def _parse_logarithm(self) -> exp.Func: 4309 # Default argument order is base, expression 4310 args = self._parse_csv(self._parse_range) 4311 4312 if len(args) > 1: 4313 if not self.LOG_BASE_FIRST: 4314 args.reverse() 4315 return exp.Log.from_arg_list(args) 4316 4317 return self.expression( 4318 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4319 ) 4320 4321 def _parse_match_against(self) -> exp.MatchAgainst: 4322 expressions = self._parse_csv(self._parse_column) 4323 4324 self._match_text_seq(")", "AGAINST", "(") 4325 4326 this = self._parse_string() 4327 4328 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4329 modifier = "IN NATURAL LANGUAGE MODE" 4330 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4331 modifier = f"{modifier} WITH QUERY EXPANSION" 4332 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4333 modifier = "IN BOOLEAN MODE" 4334 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4335 modifier = "WITH QUERY EXPANSION" 4336 else: 4337 modifier = None 4338 4339 return self.expression( 4340 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4341 ) 4342 4343 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4344 def _parse_open_json(self) -> exp.OpenJSON: 4345 this = self._parse_bitwise() 4346 path = self._match(TokenType.COMMA) and self._parse_string() 4347 4348 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4349 this = self._parse_field(any_token=True) 4350 kind = self._parse_types() 4351 path = self._parse_string() 4352 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4353 4354 return self.expression( 4355 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4356 ) 4357 4358 expressions = None 4359 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4360 self._match_l_paren() 4361 expressions = self._parse_csv(_parse_open_json_column_def) 4362 4363 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4364 4365 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4366 args = self._parse_csv(self._parse_bitwise) 4367 4368 if self._match(TokenType.IN): 4369 return self.expression( 4370 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4371 ) 4372 4373 if haystack_first: 4374 haystack = seq_get(args, 0) 4375 needle = seq_get(args, 1) 4376 else: 4377 needle = seq_get(args, 0) 4378 haystack = seq_get(args, 1) 4379 4380 return self.expression( 4381 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4382 ) 4383 4384 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4385 args = self._parse_csv(self._parse_table) 4386 return exp.JoinHint(this=func_name.upper(), expressions=args) 4387 4388 def _parse_substring(self) -> exp.Substring: 4389 # Postgres supports the form: substring(string [from int] [for int]) 4390 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4391 4392 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4393 4394 if self._match(TokenType.FROM): 4395 args.append(self._parse_bitwise()) 4396 if self._match(TokenType.FOR): 4397 args.append(self._parse_bitwise()) 4398 4399 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4400 4401 def _parse_trim(self) -> exp.Trim: 4402 # https://www.w3resource.com/sql/character-functions/trim.php 4403 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4404 4405 position = None 4406 collation = None 4407 4408 if self._match_texts(self.TRIM_TYPES): 4409 position = self._prev.text.upper() 4410 4411 expression = self._parse_bitwise() 4412 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4413 this = self._parse_bitwise() 4414 else: 4415 this = expression 4416 expression = None 4417 4418 if self._match(TokenType.COLLATE): 4419 collation = self._parse_bitwise() 4420 4421 return self.expression( 4422 exp.Trim, this=this, position=position, expression=expression, collation=collation 4423 ) 4424 4425 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4426 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4427 4428 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4429 return self._parse_window(self._parse_id_var(), alias=True) 4430 4431 def _parse_respect_or_ignore_nulls( 4432 self, this: t.Optional[exp.Expression] 4433 ) -> t.Optional[exp.Expression]: 4434 if self._match_text_seq("IGNORE", "NULLS"): 4435 return self.expression(exp.IgnoreNulls, this=this) 4436 if self._match_text_seq("RESPECT", "NULLS"): 4437 return self.expression(exp.RespectNulls, this=this) 4438 return this 4439 4440 def _parse_window( 4441 self, this: t.Optional[exp.Expression], alias: bool = False 4442 ) -> t.Optional[exp.Expression]: 4443 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4444 self._match(TokenType.WHERE) 4445 this = self.expression( 4446 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4447 ) 4448 self._match_r_paren() 4449 4450 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4451 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4452 if self._match_text_seq("WITHIN", "GROUP"): 4453 order = self._parse_wrapped(self._parse_order) 4454 this = self.expression(exp.WithinGroup, this=this, expression=order) 4455 4456 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4457 # Some dialects choose to implement and some do not. 4458 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4459 4460 # There is some code above in _parse_lambda that handles 4461 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4462 4463 # The below changes handle 4464 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4465 4466 # Oracle allows both formats 4467 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4468 # and Snowflake chose to do the same for familiarity 4469 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4470 this = self._parse_respect_or_ignore_nulls(this) 4471 4472 # bigquery select from window x AS (partition by ...) 4473 if alias: 4474 over = None 4475 self._match(TokenType.ALIAS) 4476 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4477 return this 4478 else: 4479 over = self._prev.text.upper() 4480 4481 if not self._match(TokenType.L_PAREN): 4482 return self.expression( 4483 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4484 ) 4485 4486 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4487 4488 first = self._match(TokenType.FIRST) 4489 if self._match_text_seq("LAST"): 4490 first = False 4491 4492 partition, order = self._parse_partition_and_order() 4493 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4494 4495 if kind: 4496 self._match(TokenType.BETWEEN) 4497 start = self._parse_window_spec() 4498 self._match(TokenType.AND) 4499 end = self._parse_window_spec() 4500 4501 spec = self.expression( 4502 exp.WindowSpec, 4503 kind=kind, 4504 start=start["value"], 4505 start_side=start["side"], 4506 end=end["value"], 4507 end_side=end["side"], 4508 ) 4509 else: 4510 spec = None 4511 4512 self._match_r_paren() 4513 4514 window = self.expression( 4515 exp.Window, 4516 this=this, 4517 partition_by=partition, 4518 order=order, 4519 spec=spec, 4520 alias=window_alias, 4521 over=over, 4522 first=first, 4523 ) 4524 4525 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4526 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4527 return self._parse_window(window, alias=alias) 4528 4529 return window 4530 4531 def _parse_partition_and_order( 4532 self, 4533 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4534 return self._parse_partition_by(), self._parse_order() 4535 4536 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4537 self._match(TokenType.BETWEEN) 4538 4539 return { 4540 "value": ( 4541 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4542 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4543 or self._parse_bitwise() 4544 ), 4545 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4546 } 4547 4548 def _parse_alias( 4549 self, this: t.Optional[exp.Expression], explicit: bool = False 4550 ) -> t.Optional[exp.Expression]: 4551 any_token = self._match(TokenType.ALIAS) 4552 4553 if explicit and not any_token: 4554 return this 4555 4556 if self._match(TokenType.L_PAREN): 4557 aliases = self.expression( 4558 exp.Aliases, 4559 this=this, 4560 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4561 ) 4562 self._match_r_paren(aliases) 4563 return aliases 4564 4565 alias = self._parse_id_var(any_token) 4566 4567 if alias: 4568 return self.expression(exp.Alias, this=this, alias=alias) 4569 4570 return this 4571 4572 def _parse_id_var( 4573 self, 4574 any_token: bool = True, 4575 tokens: t.Optional[t.Collection[TokenType]] = None, 4576 ) -> t.Optional[exp.Expression]: 4577 identifier = self._parse_identifier() 4578 4579 if identifier: 4580 return identifier 4581 4582 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4583 quoted = self._prev.token_type == TokenType.STRING 4584 return exp.Identifier(this=self._prev.text, quoted=quoted) 4585 4586 return None 4587 4588 def _parse_string(self) -> t.Optional[exp.Expression]: 4589 if self._match(TokenType.STRING): 4590 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4591 return self._parse_placeholder() 4592 4593 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4594 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4595 4596 def _parse_number(self) -> t.Optional[exp.Expression]: 4597 if self._match(TokenType.NUMBER): 4598 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4599 return self._parse_placeholder() 4600 4601 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4602 if self._match(TokenType.IDENTIFIER): 4603 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4604 return self._parse_placeholder() 4605 4606 def _parse_var( 4607 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4608 ) -> t.Optional[exp.Expression]: 4609 if ( 4610 (any_token and self._advance_any()) 4611 or self._match(TokenType.VAR) 4612 or (self._match_set(tokens) if tokens else False) 4613 ): 4614 return self.expression(exp.Var, this=self._prev.text) 4615 return self._parse_placeholder() 4616 4617 def _advance_any(self) -> t.Optional[Token]: 4618 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4619 self._advance() 4620 return self._prev 4621 return None 4622 4623 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4624 return self._parse_var() or self._parse_string() 4625 4626 def _parse_null(self) -> t.Optional[exp.Expression]: 4627 if self._match_set(self.NULL_TOKENS): 4628 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4629 return self._parse_placeholder() 4630 4631 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4632 if self._match(TokenType.TRUE): 4633 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4634 if self._match(TokenType.FALSE): 4635 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4636 return self._parse_placeholder() 4637 4638 def _parse_star(self) -> t.Optional[exp.Expression]: 4639 if self._match(TokenType.STAR): 4640 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4641 return self._parse_placeholder() 4642 4643 def _parse_parameter(self) -> exp.Parameter: 4644 wrapped = self._match(TokenType.L_BRACE) 4645 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4646 self._match(TokenType.R_BRACE) 4647 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4648 4649 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4650 if self._match_set(self.PLACEHOLDER_PARSERS): 4651 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4652 if placeholder: 4653 return placeholder 4654 self._advance(-1) 4655 return None 4656 4657 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4658 if not self._match(TokenType.EXCEPT): 4659 return None 4660 if self._match(TokenType.L_PAREN, advance=False): 4661 return self._parse_wrapped_csv(self._parse_column) 4662 4663 except_column = self._parse_column() 4664 return [except_column] if except_column else None 4665 4666 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4667 if not self._match(TokenType.REPLACE): 4668 return None 4669 if self._match(TokenType.L_PAREN, advance=False): 4670 return self._parse_wrapped_csv(self._parse_expression) 4671 4672 replace_expression = self._parse_expression() 4673 return [replace_expression] if replace_expression else None 4674 4675 def _parse_csv( 4676 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4677 ) -> t.List[exp.Expression]: 4678 parse_result = parse_method() 4679 items = [parse_result] if parse_result is not None else [] 4680 4681 while self._match(sep): 4682 self._add_comments(parse_result) 4683 parse_result = parse_method() 4684 if parse_result is not None: 4685 items.append(parse_result) 4686 4687 return items 4688 4689 def _parse_tokens( 4690 self, parse_method: t.Callable, expressions: t.Dict 4691 ) -> t.Optional[exp.Expression]: 4692 this = parse_method() 4693 4694 while self._match_set(expressions): 4695 this = self.expression( 4696 expressions[self._prev.token_type], 4697 this=this, 4698 comments=self._prev_comments, 4699 expression=parse_method(), 4700 ) 4701 4702 return this 4703 4704 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4705 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4706 4707 def _parse_wrapped_csv( 4708 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4709 ) -> t.List[exp.Expression]: 4710 return self._parse_wrapped( 4711 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4712 ) 4713 4714 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4715 wrapped = self._match(TokenType.L_PAREN) 4716 if not wrapped and not optional: 4717 self.raise_error("Expecting (") 4718 parse_result = parse_method() 4719 if wrapped: 4720 self._match_r_paren() 4721 return parse_result 4722 4723 def _parse_expressions(self) -> t.List[exp.Expression]: 4724 return self._parse_csv(self._parse_expression) 4725 4726 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4727 return self._parse_select() or self._parse_set_operations( 4728 self._parse_expression() if alias else self._parse_conjunction() 4729 ) 4730 4731 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4732 return self._parse_query_modifiers( 4733 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4734 ) 4735 4736 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4737 this = None 4738 if self._match_texts(self.TRANSACTION_KIND): 4739 this = self._prev.text 4740 4741 self._match_texts({"TRANSACTION", "WORK"}) 4742 4743 modes = [] 4744 while True: 4745 mode = [] 4746 while self._match(TokenType.VAR): 4747 mode.append(self._prev.text) 4748 4749 if mode: 4750 modes.append(" ".join(mode)) 4751 if not self._match(TokenType.COMMA): 4752 break 4753 4754 return self.expression(exp.Transaction, this=this, modes=modes) 4755 4756 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4757 chain = None 4758 savepoint = None 4759 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4760 4761 self._match_texts({"TRANSACTION", "WORK"}) 4762 4763 if self._match_text_seq("TO"): 4764 self._match_text_seq("SAVEPOINT") 4765 savepoint = self._parse_id_var() 4766 4767 if self._match(TokenType.AND): 4768 chain = not self._match_text_seq("NO") 4769 self._match_text_seq("CHAIN") 4770 4771 if is_rollback: 4772 return self.expression(exp.Rollback, savepoint=savepoint) 4773 4774 return self.expression(exp.Commit, chain=chain) 4775 4776 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4777 if not self._match_text_seq("ADD"): 4778 return None 4779 4780 self._match(TokenType.COLUMN) 4781 exists_column = self._parse_exists(not_=True) 4782 expression = self._parse_field_def() 4783 4784 if expression: 4785 expression.set("exists", exists_column) 4786 4787 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4788 if self._match_texts(("FIRST", "AFTER")): 4789 position = self._prev.text 4790 column_position = self.expression( 4791 exp.ColumnPosition, this=self._parse_column(), position=position 4792 ) 4793 expression.set("position", column_position) 4794 4795 return expression 4796 4797 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4798 drop = self._match(TokenType.DROP) and self._parse_drop() 4799 if drop and not isinstance(drop, exp.Command): 4800 drop.set("kind", drop.args.get("kind", "COLUMN")) 4801 return drop 4802 4803 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4804 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4805 return self.expression( 4806 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4807 ) 4808 4809 def _parse_add_constraint(self) -> exp.AddConstraint: 4810 this = None 4811 kind = self._prev.token_type 4812 4813 if kind == TokenType.CONSTRAINT: 4814 this = self._parse_id_var() 4815 4816 if self._match_text_seq("CHECK"): 4817 expression = self._parse_wrapped(self._parse_conjunction) 4818 enforced = self._match_text_seq("ENFORCED") 4819 4820 return self.expression( 4821 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4822 ) 4823 4824 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4825 expression = self._parse_foreign_key() 4826 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4827 expression = self._parse_primary_key() 4828 else: 4829 expression = None 4830 4831 return self.expression(exp.AddConstraint, this=this, expression=expression) 4832 4833 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4834 index = self._index - 1 4835 4836 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4837 return self._parse_csv(self._parse_add_constraint) 4838 4839 self._retreat(index) 4840 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4841 return self._parse_csv(self._parse_field_def) 4842 4843 return self._parse_csv(self._parse_add_column) 4844 4845 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4846 self._match(TokenType.COLUMN) 4847 column = self._parse_field(any_token=True) 4848 4849 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4850 return self.expression(exp.AlterColumn, this=column, drop=True) 4851 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4852 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4853 4854 self._match_text_seq("SET", "DATA") 4855 return self.expression( 4856 exp.AlterColumn, 4857 this=column, 4858 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4859 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4860 using=self._match(TokenType.USING) and self._parse_conjunction(), 4861 ) 4862 4863 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4864 index = self._index - 1 4865 4866 partition_exists = self._parse_exists() 4867 if self._match(TokenType.PARTITION, advance=False): 4868 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4869 4870 self._retreat(index) 4871 return self._parse_csv(self._parse_drop_column) 4872 4873 def _parse_alter_table_rename(self) -> exp.RenameTable: 4874 self._match_text_seq("TO") 4875 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4876 4877 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4878 start = self._prev 4879 4880 if not self._match(TokenType.TABLE): 4881 return self._parse_as_command(start) 4882 4883 exists = self._parse_exists() 4884 only = self._match_text_seq("ONLY") 4885 this = self._parse_table(schema=True) 4886 4887 if self._next: 4888 self._advance() 4889 4890 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4891 if parser: 4892 actions = ensure_list(parser(self)) 4893 4894 if not self._curr: 4895 return self.expression( 4896 exp.AlterTable, 4897 this=this, 4898 exists=exists, 4899 actions=actions, 4900 only=only, 4901 ) 4902 4903 return self._parse_as_command(start) 4904 4905 def _parse_merge(self) -> exp.Merge: 4906 self._match(TokenType.INTO) 4907 target = self._parse_table() 4908 4909 if target and self._match(TokenType.ALIAS, advance=False): 4910 target.set("alias", self._parse_table_alias()) 4911 4912 self._match(TokenType.USING) 4913 using = self._parse_table() 4914 4915 self._match(TokenType.ON) 4916 on = self._parse_conjunction() 4917 4918 whens = [] 4919 while self._match(TokenType.WHEN): 4920 matched = not self._match(TokenType.NOT) 4921 self._match_text_seq("MATCHED") 4922 source = ( 4923 False 4924 if self._match_text_seq("BY", "TARGET") 4925 else self._match_text_seq("BY", "SOURCE") 4926 ) 4927 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4928 4929 self._match(TokenType.THEN) 4930 4931 if self._match(TokenType.INSERT): 4932 _this = self._parse_star() 4933 if _this: 4934 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4935 else: 4936 then = self.expression( 4937 exp.Insert, 4938 this=self._parse_value(), 4939 expression=self._match(TokenType.VALUES) and self._parse_value(), 4940 ) 4941 elif self._match(TokenType.UPDATE): 4942 expressions = self._parse_star() 4943 if expressions: 4944 then = self.expression(exp.Update, expressions=expressions) 4945 else: 4946 then = self.expression( 4947 exp.Update, 4948 expressions=self._match(TokenType.SET) 4949 and self._parse_csv(self._parse_equality), 4950 ) 4951 elif self._match(TokenType.DELETE): 4952 then = self.expression(exp.Var, this=self._prev.text) 4953 else: 4954 then = None 4955 4956 whens.append( 4957 self.expression( 4958 exp.When, 4959 matched=matched, 4960 source=source, 4961 condition=condition, 4962 then=then, 4963 ) 4964 ) 4965 4966 return self.expression( 4967 exp.Merge, 4968 this=target, 4969 using=using, 4970 on=on, 4971 expressions=whens, 4972 ) 4973 4974 def _parse_show(self) -> t.Optional[exp.Expression]: 4975 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4976 if parser: 4977 return parser(self) 4978 return self._parse_as_command(self._prev) 4979 4980 def _parse_set_item_assignment( 4981 self, kind: t.Optional[str] = None 4982 ) -> t.Optional[exp.Expression]: 4983 index = self._index 4984 4985 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4986 return self._parse_set_transaction(global_=kind == "GLOBAL") 4987 4988 left = self._parse_primary() or self._parse_id_var() 4989 assignment_delimiter = self._match_texts(("=", "TO")) 4990 4991 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 4992 self._retreat(index) 4993 return None 4994 4995 right = self._parse_statement() or self._parse_id_var() 4996 this = self.expression(exp.EQ, this=left, expression=right) 4997 4998 return self.expression(exp.SetItem, this=this, kind=kind) 4999 5000 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5001 self._match_text_seq("TRANSACTION") 5002 characteristics = self._parse_csv( 5003 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5004 ) 5005 return self.expression( 5006 exp.SetItem, 5007 expressions=characteristics, 5008 kind="TRANSACTION", 5009 **{"global": global_}, # type: ignore 5010 ) 5011 5012 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5013 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5014 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5015 5016 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5017 index = self._index 5018 set_ = self.expression( 5019 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5020 ) 5021 5022 if self._curr: 5023 self._retreat(index) 5024 return self._parse_as_command(self._prev) 5025 5026 return set_ 5027 5028 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5029 for option in options: 5030 if self._match_text_seq(*option.split(" ")): 5031 return exp.var(option) 5032 return None 5033 5034 def _parse_as_command(self, start: Token) -> exp.Command: 5035 while self._curr: 5036 self._advance() 5037 text = self._find_sql(start, self._prev) 5038 size = len(start.text) 5039 return exp.Command(this=text[:size], expression=text[size:]) 5040 5041 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5042 settings = [] 5043 5044 self._match_l_paren() 5045 kind = self._parse_id_var() 5046 5047 if self._match(TokenType.L_PAREN): 5048 while True: 5049 key = self._parse_id_var() 5050 value = self._parse_primary() 5051 5052 if not key and value is None: 5053 break 5054 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5055 self._match(TokenType.R_PAREN) 5056 5057 self._match_r_paren() 5058 5059 return self.expression( 5060 exp.DictProperty, 5061 this=this, 5062 kind=kind.this if kind else None, 5063 settings=settings, 5064 ) 5065 5066 def _parse_dict_range(self, this: str) -> exp.DictRange: 5067 self._match_l_paren() 5068 has_min = self._match_text_seq("MIN") 5069 if has_min: 5070 min = self._parse_var() or self._parse_primary() 5071 self._match_text_seq("MAX") 5072 max = self._parse_var() or self._parse_primary() 5073 else: 5074 max = self._parse_var() or self._parse_primary() 5075 min = exp.Literal.number(0) 5076 self._match_r_paren() 5077 return self.expression(exp.DictRange, this=this, min=min, max=max) 5078 5079 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5080 index = self._index 5081 expression = self._parse_column() 5082 if not self._match(TokenType.IN): 5083 self._retreat(index - 1) 5084 return None 5085 iterator = self._parse_column() 5086 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5087 return self.expression( 5088 exp.Comprehension, 5089 this=this, 5090 expression=expression, 5091 iterator=iterator, 5092 condition=condition, 5093 ) 5094 5095 def _find_parser( 5096 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5097 ) -> t.Optional[t.Callable]: 5098 if not self._curr: 5099 return None 5100 5101 index = self._index 5102 this = [] 5103 while True: 5104 # The current token might be multiple words 5105 curr = self._curr.text.upper() 5106 key = curr.split(" ") 5107 this.append(curr) 5108 5109 self._advance() 5110 result, trie = in_trie(trie, key) 5111 if result == TrieResult.FAILED: 5112 break 5113 5114 if result == TrieResult.EXISTS: 5115 subparser = parsers[" ".join(this)] 5116 return subparser 5117 5118 self._retreat(index) 5119 return None 5120 5121 def _match(self, token_type, advance=True, expression=None): 5122 if not self._curr: 5123 return None 5124 5125 if self._curr.token_type == token_type: 5126 if advance: 5127 self._advance() 5128 self._add_comments(expression) 5129 return True 5130 5131 return None 5132 5133 def _match_set(self, types, advance=True): 5134 if not self._curr: 5135 return None 5136 5137 if self._curr.token_type in types: 5138 if advance: 5139 self._advance() 5140 return True 5141 5142 return None 5143 5144 def _match_pair(self, token_type_a, token_type_b, advance=True): 5145 if not self._curr or not self._next: 5146 return None 5147 5148 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5149 if advance: 5150 self._advance(2) 5151 return True 5152 5153 return None 5154 5155 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5156 if not self._match(TokenType.L_PAREN, expression=expression): 5157 self.raise_error("Expecting (") 5158 5159 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5160 if not self._match(TokenType.R_PAREN, expression=expression): 5161 self.raise_error("Expecting )") 5162 5163 def _match_texts(self, texts, advance=True): 5164 if self._curr and self._curr.text.upper() in texts: 5165 if advance: 5166 self._advance() 5167 return True 5168 return False 5169 5170 def _match_text_seq(self, *texts, advance=True): 5171 index = self._index 5172 for text in texts: 5173 if self._curr and self._curr.text.upper() == text: 5174 self._advance() 5175 else: 5176 self._retreat(index) 5177 return False 5178 5179 if not advance: 5180 self._retreat(index) 5181 5182 return True 5183 5184 @t.overload 5185 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5186 ... 5187 5188 @t.overload 5189 def _replace_columns_with_dots( 5190 self, this: t.Optional[exp.Expression] 5191 ) -> t.Optional[exp.Expression]: 5192 ... 5193 5194 def _replace_columns_with_dots(self, this): 5195 if isinstance(this, exp.Dot): 5196 exp.replace_children(this, self._replace_columns_with_dots) 5197 elif isinstance(this, exp.Column): 5198 exp.replace_children(this, self._replace_columns_with_dots) 5199 table = this.args.get("table") 5200 this = ( 5201 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5202 ) 5203 5204 return this 5205 5206 def _replace_lambda( 5207 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5208 ) -> t.Optional[exp.Expression]: 5209 if not node: 5210 return node 5211 5212 for column in node.find_all(exp.Column): 5213 if column.parts[0].name in lambda_variables: 5214 dot_or_id = column.to_dot() if column.table else column.this 5215 parent = column.parent 5216 5217 while isinstance(parent, exp.Dot): 5218 if not isinstance(parent.parent, exp.Dot): 5219 parent.replace(dot_or_id) 5220 break 5221 parent = parent.parent 5222 else: 5223 if column is node: 5224 node = dot_or_id 5225 else: 5226 column.replace(dot_or_id) 5227 return node 5228 5229 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5230 return [ 5231 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5232 for value in values 5233 if value 5234 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
924 def __init__( 925 self, 926 error_level: t.Optional[ErrorLevel] = None, 927 error_message_context: int = 100, 928 max_errors: int = 3, 929 ): 930 self.error_level = error_level or ErrorLevel.IMMEDIATE 931 self.error_message_context = error_message_context 932 self.max_errors = max_errors 933 self._tokenizer = self.TOKENIZER_CLASS() 934 self.reset()
946 def parse( 947 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 948 ) -> t.List[t.Optional[exp.Expression]]: 949 """ 950 Parses a list of tokens and returns a list of syntax trees, one tree 951 per parsed SQL statement. 952 953 Args: 954 raw_tokens: The list of tokens. 955 sql: The original SQL string, used to produce helpful debug messages. 956 957 Returns: 958 The list of the produced syntax trees. 959 """ 960 return self._parse( 961 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 962 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
964 def parse_into( 965 self, 966 expression_types: exp.IntoType, 967 raw_tokens: t.List[Token], 968 sql: t.Optional[str] = None, 969 ) -> t.List[t.Optional[exp.Expression]]: 970 """ 971 Parses a list of tokens into a given Expression type. If a collection of Expression 972 types is given instead, this method will try to parse the token list into each one 973 of them, stopping at the first for which the parsing succeeds. 974 975 Args: 976 expression_types: The expression type(s) to try and parse the token list into. 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The target Expression. 982 """ 983 errors = [] 984 for expression_type in ensure_list(expression_types): 985 parser = self.EXPRESSION_PARSERS.get(expression_type) 986 if not parser: 987 raise TypeError(f"No parser registered for {expression_type}") 988 989 try: 990 return self._parse(parser, raw_tokens, sql) 991 except ParseError as e: 992 e.errors[0]["into_expression"] = expression_type 993 errors.append(e) 994 995 raise ParseError( 996 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 997 errors=merge_errors(errors), 998 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1035 def check_errors(self) -> None: 1036 """Logs or raises any found errors, depending on the chosen error level setting.""" 1037 if self.error_level == ErrorLevel.WARN: 1038 for error in self.errors: 1039 logger.error(str(error)) 1040 elif self.error_level == ErrorLevel.RAISE and self.errors: 1041 raise ParseError( 1042 concat_messages(self.errors, self.max_errors), 1043 errors=merge_errors(self.errors), 1044 )
Logs or raises any found errors, depending on the chosen error level setting.
1046 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1047 """ 1048 Appends an error in the list of recorded errors or raises it, depending on the chosen 1049 error level setting. 1050 """ 1051 token = token or self._curr or self._prev or Token.string("") 1052 start = token.start 1053 end = token.end + 1 1054 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1055 highlight = self.sql[start:end] 1056 end_context = self.sql[end : end + self.error_message_context] 1057 1058 error = ParseError.new( 1059 f"{message}. Line {token.line}, Col: {token.col}.\n" 1060 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1061 description=message, 1062 line=token.line, 1063 col=token.col, 1064 start_context=start_context, 1065 highlight=highlight, 1066 end_context=end_context, 1067 ) 1068 1069 if self.error_level == ErrorLevel.IMMEDIATE: 1070 raise error 1071 1072 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1074 def expression( 1075 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1076 ) -> E: 1077 """ 1078 Creates a new, validated Expression. 1079 1080 Args: 1081 exp_class: The expression class to instantiate. 1082 comments: An optional list of comments to attach to the expression. 1083 kwargs: The arguments to set for the expression along with their respective values. 1084 1085 Returns: 1086 The target expression. 1087 """ 1088 instance = exp_class(**kwargs) 1089 instance.add_comments(comments) if comments else self._add_comments(instance) 1090 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1097 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1098 """ 1099 Validates an Expression, making sure that all its mandatory arguments are set. 1100 1101 Args: 1102 expression: The expression to validate. 1103 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1104 1105 Returns: 1106 The validated expression. 1107 """ 1108 if self.error_level != ErrorLevel.IGNORE: 1109 for error_message in expression.error_messages(args): 1110 self.raise_error(error_message) 1111 1112 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.